In [1]:
import pandas as pd
import numpy as np
import requests
import re
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
import time

In [2]:
# Setup ChromeDriver with Options
options = Options()
options.add_argument("--start-maximized")  # Optional
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
wait = WebDriverWait(driver, 10)

In [3]:
# Step 1: Scrape team names and URLs
base_url = "https://www.ncaa.com/stats/softball/d3/current/team/320" # change URL for different divisions
# better case would be to add another parameter to below code, use curly braces where "d_" is as a filler and update below
all_teams = []
max_pages_d1_d2 = 6
max_pages_d3 = 8
page = 1

In [4]:
while page <= max_pages_d3:
    # Pagination uses /p2, /p3, etc.
    page_suffix = f"/p{page}" if page > 0 else ""
    page_url = base_url + page_suffix
    print(f"🔗 Loading page: {page_url}")

    try:
        driver.get(page_url)
        #wait.until(EC.presence_of_all_elements_located((By.XPATH, "//table//tbody/tr")))
        rows = driver.find_elements(By.XPATH, "//table//tbody/tr")

        if not rows:
            print("⚠️ No more rows. Stopping.")
            break

        for row in rows:
            try:
                team_cell = row.find_element(By.XPATH, "./td/a")
                team_name = team_cell.text.strip()
                team_url = team_cell.get_attribute("href")

                all_teams.append({
                    "Team Name": team_name,
                    "Team URL": team_url
                })
            except Exception as e:
                print("❌ Error extracting team row:", e)
                continue

        page += 1

    except Exception as e:
        print(f"❌ Error on page {page_url}: {e}")
        break

🔗 Loading page: https://www.ncaa.com/stats/softball/d3/current/team/320/p1
🔗 Loading page: https://www.ncaa.com/stats/softball/d3/current/team/320/p2
🔗 Loading page: https://www.ncaa.com/stats/softball/d3/current/team/320/p3
🔗 Loading page: https://www.ncaa.com/stats/softball/d3/current/team/320/p4
🔗 Loading page: https://www.ncaa.com/stats/softball/d3/current/team/320/p5
🔗 Loading page: https://www.ncaa.com/stats/softball/d3/current/team/320/p6
🔗 Loading page: https://www.ncaa.com/stats/softball/d3/current/team/320/p7
🔗 Loading page: https://www.ncaa.com/stats/softball/d3/current/team/320/p8


In [5]:
# Step 2: Visit each team’s page to get city and state
for team in all_teams:
    try:
        response = requests.get(team["Team URL"], timeout=2)
        soup = BeautifulSoup(response.content, "html.parser")

        location_tag = soup.find(class_="division-location")
        if not location_tag:
            raise ValueError("Location not found")

        location = location_tag.text.strip()
        if "," in location:
            city, state = map(str.strip, location.split(",", 1))
        else:
            city, state = location, ""

        team["City"] = city
        team["State"] = state
        print(f"✅ {team['Team Name']}: {city}, {state}")
    except Exception as e:
        print(f"❌ Failed for {team['Team Name']}: {e}")
        team["City"] = "N/A"
        team["State"] = "N/A"


# for team in all_teams:
#     try:
#         driver.get(team["Team URL"])
#         #wait.until(EC.presence_of_element_located((By.CLASS_NAME, "division-location")))
#         location = driver.find_element(By.CLASS_NAME, "division-location").text.strip()

#         if "," in location:
#             city, state = map(str.strip, location.split(",", 1))
#         else:
#             city, state = location, ""

#         team["City"] = city
#         team["State"] = state
#         print(f"✅ {team['Team Name']}: {city}, {state}")
#     except Exception as e:
#         print(f"❌ Failed for {team['Team Name']}: {e}")
#         team["City"] = "N/A"
#         team["State"] = "N/A"

✅ Rowan: Division III -
                    
                    Glassboro, NJ
✅ Trine: Division III -
                    
                    Angola, IN
✅ Texas Lutheran: Division III -
                    
                    Seguin, TX
✅ East Tex. Baptist: Division III -
                    
                    Marshall, TX
✅ Va. Wesleyan: Division III -
                    
                    Virginia Beach, VA
✅ Linfield: Division III -
                    
                    McMinnville, OR
✅ Saint Mary's (MN): Division III -
                    
                    Winona, MN
✅ Chris. Newport: Division III -
                    
                    Newport News, VA
✅ Greenville: Division III -
                    
                    Greenville, IL
✅ Huntingdon: Division III -
                    
                    Montgomery, AL
✅ Baldwin Wallace: Division III -
                    
                    Berea, OH
✅ Gettysburg: Division III -
                    
           

In [6]:
# Save to CSV
df = pd.DataFrame(all_teams)
df.to_csv("d3_teams_towns.csv", index=False) # Change for different divisions

driver.quit()