In [183]:
import requests
from bs4 import BeautifulSoup

In [184]:
def player_scrape_header_info(url):
    res = requests.get(url)
    soup = BeautifulSoup(res.text, "html.parser")

    data = {}

    jersey_tag = soup.find("span", class_="sidearm-roster-player-jersey-number")
    if jersey_tag:
        data["Jersey Number"] = jersey_tag.get_text(strip=True)

    # First & Last Name
    first_name = soup.find("span", class_="sidearm-roster-player-first-name")
    last_name = soup.find("span", class_="sidearm-roster-player-last-name")
    if first_name and last_name:
        data["First Name"] = first_name.get_text(strip=True)
        data["Last Name"] = last_name.get_text(strip=True)
        data["Full Name"] = f"{data['First Name']} {data['Last Name']}"

    # Image URL
    image_div = soup.find("div", class_="sidearm-roster-player-image")
    if image_div:
        img_tag = image_div.find("img")
        if img_tag and img_tag.get("src"):
            data["Image URL"] = img_tag["src"]

    # Social links (Instagram, NIL)
    social_links = soup.find_all("a", class_="sidearm-roster-player-social-link")
    for link in social_links:
        href = link.get("href", "")
        if "instagram.com" in href:
            data["Instagram URL"] = href
        elif "opndr.se" in href:
            data["NIL URL"] = href

    # Player metadata from the fields section
    field_items = soup.select("div.sidearm-roster-player-fields li")
    for item in field_items:
        label_span = item.find("span", class_="sidearm-roster-player-field-label")
        value_span = label_span.find_next_sibling("span") if label_span else None
        if label_span and value_span:
            key = label_span.get_text(strip=True)
            value = value_span.get_text(strip=True)
            data[key] = value

    stats_section = soup.find("div", id="sidearm-roster-player-stats")
    if stats_section:
        table = stats_section.find("table")
        if table:
            headers = [th.get_text(strip=True) for th in table.find_all("th")]
            rows = []
            for row in table.find_all("tr")[1:]:  # skip header
                cells = [td.get_text(strip=True) for td in row.find_all("td")]
                if cells:
                    row_dict = dict(zip(headers, cells))
                    rows.append(row_dict)
            data["Stats"] = rows

    return data


In [185]:
player_scrape_header_info('https://fightingillini.com/sports/mens-basketball/roster/carey-booth/14522')

{'Jersey Number': '0',
 'First Name': 'Carey',
 'Last Name': 'Booth',
 'Full Name': 'Carey Booth',
 'Image URL': 'https://d1iubivivot1gj.cloudfront.net/images/2024/9/19/Booth_Carey_WEB_20240916_MBB_Headshot_KS_0714.jpg?width=300',
 'Instagram URL': 'https://www.instagram.com/careybooth',
 'NIL URL': 'https://opndr.se/carey-booth',
 'Class': 'Sophomore',
 'Height': '6-10',
 'Weight': '215',
 'Major': 'Communication',
 'Hometown': 'Englewood, Colo.',
 'High School': 'Brewster Academy (N.H.)',
 'Prev School': 'Notre Dame',
 'Instagram': 'careybooth'}

In [186]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_season_stats_w_players(season="2023-24"):
    url = f"https://fightingillini.com/sports/mens-basketball/stats/{season}"
    res = requests.get(url)
    if res.status_code != 200:
        print(f"Failed for {season}")
        return None, None

    soup = BeautifulSoup(res.text, "html.parser")

    # Get team stats section
    team_section = soup.find("section", id="team")
    if team_section is None:
        print(f"Could not find team stats section for {season}")
        return None, None
    team_table = team_section.find("table")
    team_stats = extract_stat_table(team_table, season)

    # Get player stats section
    player_section = soup.find("section", id="individual-overall")
    if player_section is None:
        print(f"Could not find player stats section for {season}")
        return team_stats, None
    player_table = player_section.find("table")
    player_stats = extract_player_table(player_table, season)

    return team_stats, player_stats


def extract_stat_table(table, season):
    rows = []
    for tr in table.find_all("tr")[1:]:  # skip header row
        tds = tr.find_all("td")
        if len(tds) < 3:
            continue  # skip malformed rows

        # First column = stat label
        stat_span = tds[0].find("span", class_="hide-on-small-down")
        stat = stat_span.get_text(strip=True) if stat_span else tds[0].get_text(strip=True)

        illinois = tds[1].get_text(strip=True) if len(tds) > 1 else None
        opponents = tds[2].get_text(strip=True) if len(tds) > 2 else None

        rows.append({
            "Statistic": stat,
            "Illinois": illinois,
            "Opponents": opponents,
            "Season": season
        })

    return pd.DataFrame(rows)


def extract_player_table(table, season):
    headers = [th.get_text(strip=True) for th in table.find("thead").find_all("th")]
    rows = []

    for tr in table.find("tbody").find_all("tr"):
        tds = tr.find_all("td")
        if not tds or len(tds) < 2:
            continue

        row_data = {}
        for i in range(min(len(headers), len(tds))):
            header = headers[i]

            if header == "Player":
                name_tag = tds[i].find("a")
                raw_name = name_tag.get_text(strip=True) if name_tag else tds[i].get_text(strip=True)

                # Convert "Last, First" → "First Last"
                if "," in raw_name:
                    last, first = raw_name.split(",", 1)
                    formatted_name = first.strip() + " " + last.strip()
                else:
                    formatted_name = raw_name.strip()

                row_data["Player"] = formatted_name
            else:
                row_data[header] = tds[i].get_text(strip=True)

        row_data["Season"] = season
        rows.append(row_data)

    return pd.DataFrame(rows)

In [208]:
team_df, players_df = scrape_season_stats_w_players("2023-24")

print("🏀 Team Stats:")
print(team_df)

print("\n👤 Player Stats:")
print(players_df)

🏀 Team Stats:
                Statistic   Illinois  Opponents   Season
0            Total Points       3168       2787  2023-24
1         Points Per Game       83.4       73.3  2023-24
2          Scoring Margin       10.0         --  2023-24
3      FG: Made-Attempted  1104-2356  1044-2418  2023-24
4          FG: Percentage       .469       .432  2023-24
5            FG: Per Game       29.1       27.5  2023-24
6     3PT: Made-Attempted    317-908    233-684  2023-24
7         3PT: Percentage       .349       .341  2023-24
8           3PT: Per Game        8.3        6.1  2023-24
9      FT: Made-Attempted    643-874    466-646  2023-24
10         FT: Percentage       .736       .721  2023-24
11           FT: Per Game       16.9       12.3  2023-24
12                  Total       1554       1259  2023-24
13               Per Game       40.9       33.1  2023-24
14                 Margin        7.8         --  2023-24
15                  Total        506        458  2023-24
16               

In [207]:
stats = ['GP', 'GS', 'Minutes', 'FG', '3PT', 'FT', 'Scoring', 'Rebounds', 'PF', 'AST', 'TO', 'STL', 'BLK', 'TOT', 'AVG', 'FGM', 'FGA', 'FG%', '3PTA', '3PT%', 'FTM', 'FTA', 'FT%']
players_df[stats] = players_df[stats].apply(pd.to_numeric, errors='coerce')
players_df[stats]

Unnamed: 0,GP,GS,Minutes,FG,3PT,FT,Scoring,Rebounds,PF,AST,...,TOT,AVG,FGM,FGA,FG%,3PTA,3PT%,FTM,FTA,FT%
0,32,31,1086,33.9,71,461,0.475,77,213,0.362,...,23.0,26,102,128,4.0,73,64,33,28,
1,38,38,1313,34.6,50,477,0.453,41,138,0.297,...,15.9,42,148,190,5.0,150,89,14,13,
2,35,35,1107,31.6,91,319,0.451,59,160,0.369,...,12.1,48,164,212,6.1,95,55,51,37,
3,38,38,915,24.1,70,267,0.476,43,115,0.374,...,9.6,65,167,232,6.1,11,34,10,12,
4,38,38,866,22.8,66,186,0.527,0,0,0.0,...,6.2,87,114,201,5.3,76,48,17,15,
5,38,3,404,10.6,41,151,0.669,0,0,0.0,...,6.1,57,79,136,3.6,9,26,7,22,
6,38,7,767,20.2,70,179,0.397,61,157,0.389,...,5.7,40,95,135,3.6,22,19,10,5,
7,38,0,711,18.7,60,182,0.407,22,72,0.306,...,5.6,35,69,104,2.7,43,25,16,6,
8,28,0,198,7.1,27,76,0.342,8,36,0.222,...,2.4,9,17,26,0.9,10,10,2,2,
9,19,0,143,7.5,22,42,0.452,4,13,0.308,...,2.4,11,29,40,2.1,6,2,4,3,


In [204]:
print("Actual columns in DataFrame:", list(players_df.columns))


Actual columns in DataFrame: ['#', 'Player', 'GP', 'GS', 'Minutes', 'FG', '3PT', 'FT', 'Scoring', 'Rebounds', 'PF', 'AST', 'TO', 'STL', 'BLK', 'Bio Link', 'TOT', 'AVG', 'FGM', 'FGA', 'FG%', '3PTA', '3PT%', 'FTM', 'FTA', 'FT%', 'Season']
