In [33]:
import pandas as pd
import requests

#we need to pass to the nhl api a list of dates we want to check
#this will first be used to check the player data for each team in the league during these seasons
year_list = ['20102011', '20112012', '20122013', '20132014', '20142015', '20152016', '20162017', '20172018', '20182019', '20192020', '20202021']

#so first we need to create an empty list that will save the unique team ids so when can then-
#request the player ids that played for each team for the dates that we specified above
team_stats = []

for season in year_list:

    res = requests.get("https://statsapi.web.nhl.com/api/v1/standings?season={}".format(season))

    for division in res.json()['records']:
        
        for team in division['teamRecords']:

            team_stats.append({'team_name': team['team']['name'],
                               'team_id': team['team']['id'],
                               'season': season,
                               'points_percentage': team['pointsPercentage']})

#grabs the each team id and saves it in a set so we know we have the unique ids                                                        
team_id_list = set([i['team_id'] for i in team_stats])

In [34]:
#now we're going to pass the team ids in a request to the nhl api as well as our year list and we'll-
#request the roster lists and then append to our blank player id list the name of our individual and their nhl.com player ID-
#this is at the end of the url for every individual's personal career page
player_id_list = []

for team in team_id_list:

    for season in year_list:

        res = requests.get("https://statsapi.web.nhl.com/api/v1/teams/{}/?expand=team.roster&season={}".format(team, season))

        try:
            for player in res.json()['teams'][0]['roster']['roster']:
                player_id_list.append(player['person']['id'])
                
        except:
            pass

#this line makes sure we only have 1 unique player id for each player(set)
player_id_list = set(player_id_list)

In [35]:
#now we'll pass our player ids in our request to the api and we want to return all avaliable stats for the players we request
#this will give us the career of our player from the beginning of our dates which we will then need to manipulate using the latest 3 seasons
#to predict 2021-2022 season we will then afterwards compare to the current stats which is the 2021-2022 season
#however as it is still not done yet there will be some variation in points
player_stats = []

for player in player_id_list:
    
    res_player = requests.get("https://statsapi.web.nhl.com/api/v1/people/{}".format(player))

    if res_player.json()['people'][0]['primaryPosition']['code'] != 'G':

        data = {'name': res_player.json()['people'][0]['firstName'] + " " + res_player.json()['people'][0]['lastName'],
                'position': res_player.json()['people'][0]['primaryPosition']['code'],
                'country': res_player.json()['people'][0]['nationality'],
                'birthday': res_player.json()['people'][0]['birthDate'],
                'id': player,
                'height': res_player.json()['people'][0]['height'],
                'weight': res_player.json()['people'][0]['weight']}
        res_stats = requests.get("https://statsapi.web.nhl.com/api/v1/people/{}/stats?stats=yearByYear".format(player))

        for year_stats in res_stats.json()['stats'][0]['splits']:

            if year_stats['season'] in year_list and year_stats['league']['name'] == 'National Hockey League':

                data[year_stats['season']] = {'goals': year_stats['stat']['goals'],
                                              'assists': year_stats['stat']['assists'],
                                              'pim': year_stats['stat']['pim'],
                                              'games': year_stats['stat']['games'],
                                              'hits': year_stats['stat']['hits'],
                                              'shots': year_stats['stat']['shots'],
                                              'time': year_stats['stat']['timeOnIce'], 
                                              'plus_minus': year_stats['stat']['plusMinus'],
                                              'team': year_stats['team']['name']}
        player_stats.append(data)

In [38]:
#this function will split our data and assign our previous two season labels which we need to predict the third season
def get_year_data(player, year, index):

    if year in player.keys():
        return {key+'_'+index:val for key,val in player[year].items()}

    else:

        return {'goals_'+index: 0,
                'assists_'+index: 0,
                'pim_'+index: 0,
                'games_'+index: 0,
                'hits_'+index: 0,
                'shots_'+index: 0,
                'time_'+index: 0,
                'plus_minus_'+index: 0}

#We now need to split the data so we can create ppg for our predicted season so we have something to compare our LSTM to
split = []
for player in player_stats:

    for i in range(9):

        year = year_list[i:i+3]

        if year[2] in player.keys():

            data = {j:player[j] for j in player.keys() if j not in year_list}
            data = {**data, **get_year_data(player, year[0], '1'), **get_year_data(player, year[1], '2')}
            data['season_1'] = year[0]
            data['season_2'] = year[1]
            data['season_3'] = year[2]
            data['ppg_3'] = (player[year[2]]['goals'] +  player[year[2]]['assists']) /   player[year[2]]['games']
            split.append(data)

In [39]:
#now we can create a dataframe using our data we split from the dict
nhl_api_data = pd.DataFrame(split)

#save the df to read into another file
nhl_api_data.to_csv(r'C:\Users\tdavi\Documents\Concordia Bootcamps\hockey_final_project\nhl_api_dataset.csv')