Import statements

In [1]:
import requests
from bs4 import BeautifulSoup
import time
import pandas as pd
from io import StringIO

In [2]:
def get_team_codes():
    """Returns a list of all current NBA team codes"""
    return [
        'ATL', 'BOS', 'BRK', 'CHI', 'CHO', 'CLE', 'DAL', 'DEN', 'DET', 'GSW',
        'HOU', 'IND', 'LAC', 'LAL', 'MEM', 'MIA', 'MIL', 'MIN', 'NOP', 'NYK',
        'OKC', 'ORL', 'PHI', 'PHO', 'POR', 'SAC', 'SAS', 'TOR', 'UTA', 'WAS'
    ]

In [3]:
def scrape_team_advanced_gamelog(team_code, year=2024):
    """
    Scrapes advanced game log for a specific team and year.
    
    Args:
        team_code (str): Team code (e.g., 'CHO' for Charlotte).
        year (int): Season year (e.g., 2024 for the 2023-24 season).
    
    Returns:
        pandas.DataFrame: Cleaned advanced game log data, or None if scraping fails.
    """
    url = f"https://www.basketball-reference.com/teams/{team_code}/{year}/gamelog-advanced/"
    
    # Add a polite delay to avoid overloading the server
    time.sleep(5)
    
    try:
        # Fetch the page
        response = requests.get(url)
        response.raise_for_status()
        
        # Check if the page contains valid data
        if "Page Not Found" in response.text:
            print(f"Warning: No data found for {team_code} {year}")
            return None
        
        # Parse the HTML and extract the advanced game log table
        soup = BeautifulSoup(response.text, 'html.parser')
        table = soup.find('table', id='tgl_advanced')
        
        if table is None:
            print(f"Warning: No table found for {team_code} {year}")
            return None
        
        # Convert the HTML table to a DataFrame
        df = pd.read_html(StringIO(str(table)))[0]
        
        return df
    
    except requests.exceptions.HTTPError as http_err:
        print(f"HTTP error occurred for {team_code}: {http_err}")
    except requests.exceptions.ConnectionError as conn_err:
        print(f"Connection error occurred for {team_code}: {conn_err}")
    except Exception as err:
        print(f"An error occurred for {team_code}: {err}")
    
        return None


In [4]:
# Scrape regular season stats
cho_2024 = scrape_team_advanced_gamelog('CHO')

# Set column names to the stats they represent
cho_2024.columns = cho_2024.columns.get_level_values(1)

# Make Game Number (G) the index
cho_2024.set_index('G', inplace=True)

# Drop unneccessary columns
cho_2024.drop(columns=['Rk'], inplace=True)
cho_2024.drop(columns=['Unnamed: 18_level_1'], inplace=True)
cho_2024.drop(columns=['Unnamed: 23_level_1'], inplace=True)

# Rename columns
cho_2024.rename(columns={'Unnamed: 3_level_1': 'Home/Away'}, inplace=True)
cho_2024.rename(columns={'Tm': 'PtsFwd'}, inplace=True)
cho_2024.rename(columns={'Opp': 'PtsAgt'}, inplace=True)

# Drop extraneous rows that do not house game info
cho_2024 = cho_2024[cho_2024.index.notnull()]  # Keep only non-NaN indices (ie rows without the 3 collective headers)
cho_2024 = cho_2024[cho_2024.index != "G"]     # Keep only rows wiht games (ie rows without individual stat headers)

# Edit values in Home/Away column to be H and A
cho_2024['Home/Away'] = cho_2024['Home/Away'].fillna('H')
cho_2024['Home/Away'] = cho_2024['Home/Away'].replace('@', 'A')


In [5]:
cho_2024

Unnamed: 0_level_0,Date,Home/Away,PtsAgt,W/L,PtsFwd,PtsAgt,ORtg,DRtg,Pace,FTr,...,STL%,BLK%,eFG%,TOV%,ORB%,FT/FGA,eFG%,TOV%,DRB%,FT/FGA
G,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,2023-10-25,H,ATL,W,116,110,112.2,106.4,103.4,.302,...,4.8,4.7,.564,16.3,28.6,.221,.446,10.0,76.5,.290
2,2023-10-27,H,DET,L,99,111,92.3,103.5,107.2,.386,...,12.1,12.1,.415,14.9,21.2,.295,.511,19.5,71.4,.239
3,2023-10-30,H,BRK,L,121,133,116.2,127.7,104.2,.208,...,7.7,9.3,.531,8.7,18.2,.198,.632,12.2,75.6,.264
4,2023-11-01,A,HOU,L,119,128,117.7,126.6,101.1,.278,...,8.9,17.0,.561,13.7,32.6,.200,.649,15.3,80.6,.226
5,2023-11-04,A,IND,W,125,124,127.4,126.4,98.1,.261,...,5.1,20.8,.580,11.7,32.4,.261,.602,13.0,72.5,.129
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
78,2024-04-07,H,OKC,L,118,121,119.6,122.7,98.6,.169,...,12.2,17.6,.645,16.8,21.2,.133,.621,15.2,74.4,.149
79,2024-04-09,H,DAL,L,104,130,103.7,129.6,100.3,.236,...,8.0,7.0,.494,10.9,17.0,.180,.629,12.8,66.0,.140
80,2024-04-10,A,ATL,W,115,114,118.5,117.5,97.0,.278,...,11.3,4.8,.595,13.6,13.9,.266,.634,16.8,80.0,.122
81,2024-04-12,A,BOS,L,98,131,99.8,133.4,98.2,.289,...,3.1,11.1,.520,18.9,9.8,.250,.587,5.2,64.4,.087
