In [7]:
import pandas as pd
import requests
from io import StringIO

pd.set_option('display.max_columns', None)


In [12]:
def nst_player_on_ice_scraper(fromseason, thruseason, startdate, enddate, stype=2, sit='5v5'):
    """
    Extracts player on-ice statistics from Natural Stat Trick for specified seasons and filtering conditions.

    Parameters:
        fromseason (int): The starting season in the format YYYYYYYY (e.g., 20242025).
        thruseason (int): The ending season in the format YYYYYYYY (e.g., 20242025).
        startdate (str): The start date in the format YYYY-MM-DD (e.g., 2024-10-08).
        enddate (str): The end date in the format YYYY-MM-DD (e.g., 2024-10-14).
        stype (int, optional): Type of statistics to retrieve. Defaults to 2 regular season.
        sit (str, optional): Situation type to filter by (e.g., '5v5'). Defaults to '5v5'.

    Returns:
        None: Displays the DataFrame and saves it to a CSV file.

    Raises:
        requests.exceptions.HTTPError: If the HTTP request returned an unsuccessful status code.
        Exception: For any other errors that occur during the scraping process.
    """
    url = f"https://www.naturalstattrick.com/playerteams.php?fromseason={fromseason}&thruseason={thruseason}&stype={stype}&sit={sit}&score=all&stdoi=std&rate=n&team=ALL&pos=S&loc=B&toi=0&gpfilt=gpdate&fd={startdate}&td={enddate}&tgp=410&lines=single&draftteam=ALL"

    try:
        # Send a GET request to the URL
        response = requests.get(url)
        response.raise_for_status()  # Raises HTTPError for bad responses

        # Wrap the response text in StringIO
        html_content = StringIO(response.text)

        # Parse all tables from the HTML content using 'lxml' parser
        tables = pd.read_html(html_content, flavor='lxml')

        if tables:
            # Assuming the first table is the one you need
            df = tables[0]
            if 'Unnamed: 0' in df.columns:
                df = df.drop(columns=['Unnamed: 0'])
            df.columns = df.columns.str.lower().str.replace(' ', '_')
            return df
        else:
            print("No tables found on the webpage.")

    except requests.exceptions.HTTPError as http_err:
        print(f"HTTP error occurred: {http_err}")  # HTTP error
    except Exception as err:
        print(f"An error occurred: {err}")  # Other errors

In [13]:
df = nst_player_on_ice_scraper(fromseason=20242025, thruseason=20242025, startdate='2024-10-08', enddate='2024-10-14')
df

Unnamed: 0,player,team,position,gp,toi,goals,total_assists,first_assists,second_assists,total_points,ipp,shots,sh%,ixg,icf,iff,iscf,ihdcf,rush_attempts,rebounds_created,pim,total_penalties,minor,major,misconduct,penalties_drawn,giveaways,takeaways,hits,hits_taken,shots_blocked,faceoffs_won,faceoffs_lost,faceoffs_%
0,Ryan Suter,STL,D,3,42.766667,1,0,0,0,1,25.00,5,20.00,0.14,12,6,3,1,0,1,0,0,0,0,0,0,2,0,0,1,2,0,0,-
1,Brent Burns,CAR,D,1,12.166667,0,0,0,0,0,0.00,1,0.00,0.05,4,2,0,0,0,0,2,1,1,0,0,0,1,0,0,0,0,0,0,-
2,Corey Perry,EDM,R,3,30.800000,1,0,0,0,1,100.00,5,20.00,0.92,9,9,6,4,0,0,0,0,0,0,0,0,0,1,1,3,1,0,0,-
3,Alex Ovechkin,WSH,L,1,14.400000,0,1,0,1,1,100.00,0,-,0.00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,-
4,Evgeni Malkin,PIT,C,4,55.416667,0,4,4,0,4,100.00,3,0.00,0.57,10,5,5,3,0,2,0,0,0,0,0,0,1,0,2,3,2,17,21,44.74
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
609,Uvis Balinskis,FLA,D,4,48.000000,0,1,0,1,1,100.00,2,0.00,0.04,4,2,1,0,0,0,2,1,1,0,0,0,1,1,3,8,1,0,0,-
610,Matvei Michkov,PHI,R,2,25.533333,0,0,0,0,0,-,2,0.00,0.20,6,5,1,0,0,1,2,1,1,0,0,1,1,0,0,1,2,0,0,-
611,Jett Luchanko,PHI,C,1,9.650000,0,0,0,0,0,-,1,0.00,0.23,2,2,1,1,0,0,0,0,0,0,0,0,1,0,0,1,0,3,3,50.00
612,Macklin Celebrini,S.J,C,1,15.016667,1,1,1,0,2,100.00,1,100.00,0.02,1,1,0,0,0,0,0,0,0,0,0,0,2,1,0,0,0,1,11,8.33
