# For a single Player

In [1]:
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import time


def get_player_profile(base_search_url, player_name, state, sport, school_keyword):
    """Searches for a player's profile link on MaxPreps and filters the correct one based on the school keyword."""
    search_url = f"{base_search_url}?q={player_name.replace(' ', '+')}&state={state}&sport={sport}"
    print(f"Searching for player: {search_url}")

    response = requests.get(search_url)
    if response.status_code != 200:
        print(f"Failed to load search page. Status code: {response.status_code}")
        return None

    soup = BeautifulSoup(response.text, 'html.parser')
    player_links = soup.find_all('a', href=True)

    profiles = []
    for link in player_links:
        href = link.get("href", "")
        if "/athletes/" in href and school_keyword.lower() in href.lower():
            profiles.append("https://www.maxpreps.com" + href)

    return profiles if profiles else None


def find_valid_stats_url(profiles, sport="basketball"):
    """Finds the stats URL for basketball by checking each profile."""
    for profile_url in profiles:
        response = requests.get(profile_url)
        if response.status_code != 200:
            print(f"Failed to load player profile: {profile_url}")
            continue

        soup = BeautifulSoup(response.text, 'html.parser')
        stats_link = soup.find('a', string='Stats')

        if stats_link:
            stats_url = "https://www.maxpreps.com" + stats_link['href']
            if sport in stats_url:
                return stats_url  # Return the first valid basketball stats URL

    print("No valid stats URL found for basketball.")
    return None


def fetch_combined_stats(stats_url):
    """Fetch combined stats including general and shooting stats."""
    options = Options()
    options.headless = True
    driver = webdriver.Chrome(options=options)

    try:
        driver.get(stats_url)
        time.sleep(5)

        # General stats (GP, PPG, RPG, APG, SPG, BPG)
        rows = driver.find_elements(By.XPATH, '//tbody[contains(@class, "table-body")]/tr')
        general_stats = {}
        for row in rows:
            cells = row.find_elements(By.TAG_NAME, 'td')
            if len(cells) > 8:
                general_stats = {
                    'GP': int(cells[0].text.strip()),
                    'PPG': cells[2].text.strip(),
                    'RPG': cells[5].text.strip(),
                    'APG': cells[6].text.strip(),
                    'SPG': cells[7].text.strip(),
                    'BPG': cells[8].text.strip(),
                }
                break

        # Click on the "Shooting" tab
        shooting_tab = driver.find_element(By.XPATH, '//button[@title="Shooting"]')
        shooting_tab.click()
        time.sleep(3)

        # Shooting stats (1)
        shooting_1_section = driver.find_element(By.XPATH, '//h2[text()="Shooting (1)"]/following-sibling::div')
        rows_1 = shooting_1_section.find_elements(By.XPATH, './/tbody/tr')
        shooting_1_stats = {}
        for row in rows_1:
            cells = row.find_elements(By.TAG_NAME, 'td')
            if len(cells) > 5:
                shooting_1_stats = {
                    'PTS': cells[2].text.strip(),
                    'FGM': cells[3].text.strip(),
                    'FGA': cells[4].text.strip(),
                    'FG%': cells[5].text.strip(),
                }
                break

        # Shooting stats (2)
        shooting_2_section = driver.find_element(By.XPATH, '//h2[text()="Shooting (2)"]/following-sibling::div')
        rows_2 = shooting_2_section.find_elements(By.XPATH, './/tbody/tr')
        shooting_2_stats = {}
        for row in rows_2:
            cells = row.find_elements(By.TAG_NAME, 'td')
            if len(cells) > 8:
                shooting_2_stats = {
                    '3PA': cells[3].text.strip(),
                    '3PM': cells[4].text.strip(),
                    '3P%': cells[5].text.strip(),
                }
                break

        # Combine all stats
        combined_stats = {**general_stats, **shooting_1_stats, **shooting_2_stats}
        return combined_stats if combined_stats else "No combined stats found."

    except Exception as e:
        print(f"An error occurred: {e}")
        return None

    finally:
        driver.quit()


# Main Workflow
base_search_url = "https://www.maxpreps.com/search/"
player_name = "Braden Housley"
state = "ut"
sport = "basketball"
school_keyword = "skyridge"

# Step 1: Get player profile URLs
player_profiles = get_player_profile(base_search_url, player_name, state, sport, school_keyword)

if player_profiles:
    print(f"Found Player Profiles: {player_profiles}")

    # Step 2: Find the valid stats URL for basketball
    stats_url = find_valid_stats_url(player_profiles, sport)
    if stats_url:
        print(f"Stats URL Found: {stats_url}")

        # Step 3: Fetch combined stats
        combined_stats = fetch_combined_stats(stats_url)
        if combined_stats:
            print("Combined Stats (General + Shooting):", combined_stats)
        else:
            print("No stats found for the player.")
    else:
        print("Unable to find a valid stats URL.")
else:
    print("Player not found.")


Searching for player: https://www.maxpreps.com/search/?q=Braden+Housley&state=ut&sport=basketball
Found Player Profiles: ['https://www.maxpreps.com/ut/lehi/skyridge-falcons/athletes/braden-housley/?careerid=tls8soeb7pu99']
Stats URL Found: https://www.maxpreps.com/ut/lehi/skyridge-falcons/athletes/braden-housley/basketball/stats/?careerid=tls8soeb7pu99
Combined Stats (General + Shooting): {'GP': 25, 'PPG': '16.4', 'RPG': '3.8', 'APG': '6.8', 'SPG': '2.9', 'BPG': '0.6', 'PTS': '410', 'FGM': '136', 'FGA': '0', 'FG%': '0', '3PA': '44', '3PM': '0', '3P%': '0'}


# CSV File

In [None]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import time


def get_player_profile(base_search_url, player_name, state, sport, school_keyword):
    """Searches for a player's profile link on MaxPreps and filters the correct one based on the school keyword."""
    search_url = f"{base_search_url}?q={player_name.replace(' ', '+')}&state={state}&sport={sport}"
    print(f"Searching for player: {search_url}")

    response = requests.get(search_url)
    if response.status_code != 200:
        print(f"Failed to load search page. Status code: {response.status_code}")
        return None

    soup = BeautifulSoup(response.text, 'html.parser')
    player_links = soup.find_all('a', href=True)

    profiles = []
    for link in player_links:
        href = link.get("href", "")
        if "/athletes/" in href and school_keyword.lower() in href.lower():
            profiles.append("https://www.maxpreps.com" + href)

    return profiles if profiles else None


def find_valid_stats_url(profiles, sport="basketball"):
    """Finds the stats URL for basketball by checking each profile."""
    for profile_url in profiles:
        response = requests.get(profile_url)
        if response.status_code != 200:
            print(f"Failed to load player profile: {profile_url}")
            continue

        soup = BeautifulSoup(response.text, 'html.parser')
        stats_link = soup.find('a', string='Stats')

        if stats_link:
            stats_url = "https://www.maxpreps.com" + stats_link['href']
            if sport in stats_url:
                return stats_url  # Return the first valid basketball stats URL

    print("No valid stats URL found for basketball.")
    return None


def fetch_combined_stats(stats_url):
    """Fetch combined stats including general and shooting stats."""
    options = Options()
    options.headless = True
    driver = webdriver.Chrome(options=options)

    try:
        driver.get(stats_url)
        time.sleep(5)

        # General stats (GP, PPG, RPG, APG, SPG, BPG)
        rows = driver.find_elements(By.XPATH, '//tbody[contains(@class, "table-body")]/tr')
        general_stats = {}
        for row in rows:
            cells = row.find_elements(By.TAG_NAME, 'td')
            if len(cells) > 8:
                general_stats = {
                    'GP': int(cells[0].text.strip()),
                    'PPG': cells[2].text.strip(),
                    'RPG': cells[5].text.strip(),
                    'APG': cells[6].text.strip(),
                    'SPG': cells[7].text.strip(),
                    'BPG': cells[8].text.strip(),
                }
                break

        # Click on the "Shooting" tab
        shooting_tab = driver.find_element(By.XPATH, '//button[@title="Shooting"]')
        shooting_tab.click()
        time.sleep(3)

        # Shooting stats (1)
        shooting_1_section = driver.find_element(By.XPATH, '//h2[text()="Shooting (1)"]/following-sibling::div')
        rows_1 = shooting_1_section.find_elements(By.XPATH, './/tbody/tr')
        shooting_1_stats = {}
        for row in rows_1:
            cells = row.find_elements(By.TAG_NAME, 'td')
            if len(cells) > 5:
                shooting_1_stats = {
                    'PTS': cells[2].text.strip(),
                    'FGM': cells[3].text.strip(),
                    'FGA': cells[4].text.strip(),
                    'FG%': cells[5].text.strip(),
                }
                break

        # Shooting stats (2)
        shooting_2_section = driver.find_element(By.XPATH, '//h2[text()="Shooting (2)"]/following-sibling::div')
        rows_2 = shooting_2_section.find_elements(By.XPATH, './/tbody/tr')
        shooting_2_stats = {}
        for row in rows_2:
            cells = row.find_elements(By.TAG_NAME, 'td')
            if len(cells) > 8:
                shooting_2_stats = {
                    '3PA': cells[3].text.strip(),
                    '3PM': cells[4].text.strip(),
                    '3P%': cells[5].text.strip(),
                }
                break

        # Combine all stats
        combined_stats = {**general_stats, **shooting_1_stats, **shooting_2_stats}
        return combined_stats if combined_stats else "No combined stats found."

    except Exception as e:
        print(f"An error occurred: {e}")
        return None

    finally:
        driver.quit()


# Load cleaned player data
player_data = pd.read_csv("Utah_state_Womens.csv")

# Create an empty DataFrame to store players with stats
players_with_stats = pd.DataFrame(columns=player_data.columns.tolist() + ["GP", "PPG", "RPG", "APG", "SPG", "BPG", "PTS", "FGM", "FGA", "FG%", "3PA", "3PM", "3P%"])

# Iterate through each player and fetch stats
base_search_url = "https://www.maxpreps.com/search/"
for index, row in player_data.iterrows():
    player_name = row["Name"]
    state = row["State"]
    school_keyword = row["School"]

    print(f"Processing Player: {player_name}, State: {state}, School Keyword: {school_keyword}")

    # Step 1: Get player profile URLs
    player_profiles = get_player_profile(base_search_url, player_name, state, "basketball", school_keyword)

    if player_profiles:
        print(f"Found Player Profiles: {player_profiles}")

        # Step 2: Find the valid stats URL for basketball
        stats_url = find_valid_stats_url(player_profiles, "basketball")
        if stats_url:
            print(f"Stats URL Found: {stats_url}")

            # Step 3: Fetch combined stats
            combined_stats = fetch_combined_stats(stats_url)
            if combined_stats:
                # Append player stats to the DataFrame using pd.concat()
                player_stats = pd.DataFrame([{**row.to_dict(), **combined_stats}])
                players_with_stats = pd.concat([players_with_stats, player_stats], ignore_index=True)
        else:
            print("No valid stats URL found.")
    else:
        print("Player not found.")

# Display the resulting DataFrame
print(players_with_stats)

# Save to CSV
players_with_stats.to_csv("Utah_state_Womens_Stats.csv", index=False)
