In [None]:
# This notebook is a self-contained set of functions for accessing player
# information from the NHL API

import pandas as pd
import urllib3
import json
import os
from jsonpath_ng import jsonpath, parse

# You will note that most of the functions start with an double underscore.
# This loosely indicates that they are private, and shouldn't be called by
# others. However, Python doesn't strictly encforce this requirement.

def __rip_nhl_player_ids(season:str ='20172018') -> pd.DataFrame:
    '''Downloads data for a given season from the NHL API. Season should be in the form of
    YYYYyyyy where the first is the starting year of the season and the second is the ending

    :param season: The season to receive data from
    :return: A DataFrame with an id (stats API key for player), their fullname, and an api link
    '''
    roster_url=f'https://statsapi.web.nhl.com/api/v1/teams?expand=team.roster&season={season}'
    http = urllib3.PoolManager()
    r = http.request('GET', roster_url)

    # Convert to json and use jsonpath to find person information
    json_data=json.loads(r.data)
    jsonpath_person_expr = parse('$..person')
    person_df = pd.DataFrame([match.value for match in jsonpath_person_expr.find(json_data)])

    jsonpath_position_expr = parse('$..code')
    position_df = pd.DataFrame([match.value for match in jsonpath_position_expr.find(json_data)])
    position_df = position_df.rename(columns={0:"position_code"})

    # Return a DataFrame of player information
    return pd.merge(person_df, position_df, how="inner", left_index=True, right_index=True)

# In addition to getting a list of the players and their positions, we
# also need to get information about their stats. This is a second API call
# that we need to make for each player in each season. That's a lot of API
# calls!

def __rip_nhl_player_data(player_id:str, season_id:str) -> pd.Series:
    player_url=f'https://statsapi.web.nhl.com/api/v1/people/{player_id}/stats?stats=statsSingleSeason&season={season_id}'
    http = urllib3.PoolManager()
    r = http.request('GET', player_url)
    json_data=json.loads(r.data)
    jsonpath_expr = parse('$..stat')

    try:
        matches=jsonpath_expr.find(json_data)
        return pd.Series(matches[0].value)
    except:
        print( f"Player {player_id} from season {season_id} failed to load.")

# Now we need a helper function to map those two calls together into one dataset

def __get_player_data(row, season_id):
    return pd.concat([__rip_nhl_player_data(row["id"], season_id), row])

# And this is our only public function, which returns all of the stats for
# players in a given season, optionally saving them to a file.
def get_player_stats_by_season(season:str ='20172018', save:bool=True, save_dir:str='.') -> pd.DataFrame:
    '''Returns a DataFrame of statistics by player for a given season
    :param season: The season to receive data from in YYYYyyyy format
    :param save: Optional - whether to save the results to a csv file or not, default to True
    :param save_dir: Optional - where to save the result csv to, defaults to '.'
    :return: A DataFrame of statistics for each player   
    '''
    df=__rip_nhl_player_ids(season)
    df=df.apply(__get_player_data, axis='columns', args=(season,))
    if save:
        df.to_csv(f"{save_dir}{os.path.sep}player_data_season_{season}.csv", index=False)
    return df

In [None]:
# Great, with a little library we can now give this a try. If you want to test
# it you can just commend out the code from below. You should see that two
# players fail to load in this season. Don't forget to comment this out again
# before you incorporate the module in your main code.

#get_player_stats_by_season(season ='20052006', save=False)