In [47]:
import requests
import json
import pandas as pd
import numpy as np

nhl_url = 'https://statsapi.web.nhl.com/api/v1/'

def nhl_request(path, **kwargs):
    res = requests.get(nhl_url+path, **kwargs)
    if res.status_code == 200:
        return res.json()

In [48]:
params= {}
params['season'] = '20182019'
params['gameType'] = 'R'
response = nhl_request(f'schedule', params=params)

In [49]:
dateArray = []
gameIDArray = []
for dateObj in response['dates']:
    date = dateObj['date']
    dateArray.append(date)
    for gameObj in dateObj['games']:
        gameIDArray.append(gameObj['gamePk'])

In [50]:
def getSeasonGameIDs(season):
    params= {}
    params['season'] = season
    params['gameType'] = 'R'
    response = nhl_request(f'schedule', params=params)
    gameIDArray = []
    for dateObj in response['dates']:
        date = dateObj['date']
        dateArray.append(date)
        for gameObj in dateObj['games']:
            gameIDArray.append(gameObj['gamePk'])
    return gameIDArray

In [51]:
def getGameData(gameID):
    events = []
    gameRes = nhl_request(f'game/{gameID}/feed/live')
    homeTeam = gameRes['gameData']['teams']['home']['name']
    homeId = gameRes['gameData']['teams']['home']['id']
    awayTeam = gameRes['gameData']['teams']['away']['name']
    awayId = gameRes['gameData']['teams']['away']['id']
    for play in gameRes['liveData']['plays']['allPlays']:
        result = play['result']
        event = result['event']
        eventTypeId = result['eventTypeId']
        secondaryType = '';
        if 'secondaryType' in result:
            secondaryType = result['secondaryType']
        penaltySeverity = ''
        if 'penaltySeverity' in result:
            penaltySeverity = result['penaltySeverity']
        penaltyMinutes = ''
        if 'penaltyMinutes' in result:
            penaltyMinutes = result['penaltyMinutes']
        eventIDx = play['about']['eventIdx']
        eventID = play['about']['eventId']
        period = play['about']['period']
        periodTime = play['about']['periodTime']
        player1ID = ''
        player1FullName = ''
        player1Type = ''
        player2ID = ''
        player2FullName = ''
        player2Type = ''
        player3ID = ''
        player3FullName = ''
        player3Type = ''
        player4ID = ''
        player4FullName = ''
        player4Type = ''
        teamID = ''
        teamName = ''
        if 'team' in play:
            teamID = play['team']['id']
            teamName = play['team']['name']
        if 'players' in play:
            player1ID = play['players'][0]['player']['id']
            player1FullName = play['players'][0]['player']['fullName']
            player1Type = play['players'][0]['playerType']
            if len(play['players']) > 1:  # 2 players involved
                player2ID = play['players'][1]['player']['id']
                player2FullName = play['players'][1]['player']['fullName']
                player2Type = play['players'][1]['playerType']
            if len(play['players']) > 2: # 3 players involved
                player3ID = play['players'][2]['player']['id']
                player3FullName = play['players'][2]['player']['fullName']
                player3Type = play['players'][2]['playerType']
            if len(play['players']) > 3: # 4 players involved
                player4ID = play['players'][3]['player']['id']
                player4FullName = play['players'][3]['player']['fullName']
                player4Type = play['players'][3]['playerType']

        dict1 = {
            "gameId" : gameID,
            "homeTeam" : homeTeam,
            "homeTeamId" : homeId,
            "awayTeam" : awayTeam,
            "awayTeamId" : awayId,
            "event" : event,
            "eventTypeId" : eventTypeId,
            "eventIDx" : eventIDx,
            "eventID" : eventID,
            "secondaryType" : secondaryType,
            "penaltySeverity" : penaltySeverity,
            "penaltyMinutes" : penaltyMinutes,
            "period" : period,
            "periodTime" : periodTime,
            "player1ID" : player1ID,
            "player1FullName" : player1FullName,
            "player1Type" : player1Type,
            "player2ID" : player2ID,
            "player2FullName" : player2FullName,
            "player2Type" : player2Type,
            "player3ID" : player3ID,
            "player3FullName" : player3FullName,
            "player3Type" : player3Type,
            "player4ID" : player4ID,
            "player4FullName" : player4FullName,
            "player4Type" : player4Type,
            "teamID" : teamID,
            "teamName" : teamName
        }
        events.append(dict1)
    return events
    

In [52]:
def getFightGameData(gameID):
    events = []
    gameRes = nhl_request(f'game/{gameID}/feed/live')
    hasFight = False
    for penaltyPlay in gameRes['liveData']['plays']['penaltyPlays']:
        play = gameRes['liveData']['plays']['allPlays'][penaltyPlay]
        if play['result']['secondaryType'] == 'Fighting':
            hasFight = True
    if not hasFight:
#         print(f'game {gameID} has no fights')
        return events
    else:
        print(f'Found a fight in game {gameID}')
        homeTeam = gameRes['gameData']['teams']['home']['name']
        homeId = gameRes['gameData']['teams']['home']['id']
        awayTeam = gameRes['gameData']['teams']['away']['name']
        awayId = gameRes['gameData']['teams']['away']['id']
        for play in gameRes['liveData']['plays']['allPlays']:
            result = play['result']
            event = result['event']
            eventTypeId = result['eventTypeId']
            secondaryType = '';
            if 'secondaryType' in result:
                secondaryType = result['secondaryType']
            penaltySeverity = ''
            if 'penaltySeverity' in result:
                penaltySeverity = result['penaltySeverity']
            penaltyMinutes = ''
            if 'penaltyMinutes' in result:
                penaltyMinutes = result['penaltyMinutes']
            eventIDx = play['about']['eventIdx']
            eventID = play['about']['eventId']
            period = play['about']['period']
            periodTime = play['about']['periodTime']
            player1ID = ''
            player1FullName = ''
            player1Type = ''
            player2ID = ''
            player2FullName = ''
            player2Type = ''
            player3ID = ''
            player3FullName = ''
            player3Type = ''
            player4ID = ''
            player4FullName = ''
            player4Type = ''
            teamID = ''
            teamName = ''
            if 'team' in play:
                teamID = play['team']['id']
                teamName = play['team']['name']
            if 'players' in play:
                player1ID = play['players'][0]['player']['id']
                player1FullName = play['players'][0]['player']['fullName']
                player1Type = play['players'][0]['playerType']
                if len(play['players']) > 1:  # 2 players involved
                    player2ID = play['players'][1]['player']['id']
                    player2FullName = play['players'][1]['player']['fullName']
                    player2Type = play['players'][1]['playerType']
                if len(play['players']) > 2: # 3 players involved
                    player3ID = play['players'][2]['player']['id']
                    player3FullName = play['players'][2]['player']['fullName']
                    player3Type = play['players'][2]['playerType']
                if len(play['players']) > 3: # 4 players involved
                    player4ID = play['players'][3]['player']['id']
                    player4FullName = play['players'][3]['player']['fullName']
                    player4Type = play['players'][3]['playerType']

            dict1 = {
                "gameId" : gameID,
                "homeTeam" : homeTeam,
                "homeTeamId" : homeId,
                "awayTeam" : awayTeam,
                "awayTeamId" : awayId,
                "event" : event,
                "eventTypeId" : eventTypeId,
                "eventIDx" : eventIDx,
                "eventID" : eventID,
                "secondaryType" : secondaryType,
                "penaltySeverity" : penaltySeverity,
                "penaltyMinutes" : penaltyMinutes,
                "period" : period,
                "periodTime" : periodTime,
                "player1ID" : player1ID,
                "player1FullName" : player1FullName,
                "player1Type" : player1Type,
                "player2ID" : player2ID,
                "player2FullName" : player2FullName,
                "player2Type" : player2Type,
                "player3ID" : player3ID,
                "player3FullName" : player3FullName,
                "player3Type" : player3Type,
                "player4ID" : player4ID,
                "player4FullName" : player4FullName,
                "player4Type" : player4Type,
                "teamID" : teamID,
                "teamName" : teamName
            }
            events.append(dict1)
        return events
    

In [53]:
def getFightGameDataCSV(season):
    gameIDs = getSeasonGameIDs(season)
    data = []
    for gameID in gameIDs:
        data = data + getFightGameData(gameID)
    df = pd.DataFrame(data)
    df.to_csv(f'AllFightGameData-{season}.csv', index=False)
    print(f'Data gathering for {season} complete')

In [54]:
# 2017-2018 season
getFightGameDataCSV(20172018)

Found a fight in game 2017020003
Found a fight in game 2017020005
Found a fight in game 2017020013
Found a fight in game 2017020018
Found a fight in game 2017020021
Found a fight in game 2017020025
Found a fight in game 2017020028
Found a fight in game 2017020034
Found a fight in game 2017020038
Found a fight in game 2017020046
Found a fight in game 2017020048
Found a fight in game 2017020052
Found a fight in game 2017020053
Found a fight in game 2017020054
Found a fight in game 2017020056
Found a fight in game 2017020058
Found a fight in game 2017020061
Found a fight in game 2017020062
Found a fight in game 2017020069
Found a fight in game 2017020073
Found a fight in game 2017020083
Found a fight in game 2017020085
Found a fight in game 2017020088
Found a fight in game 2017020095
Found a fight in game 2017020123
Found a fight in game 2017020127
Found a fight in game 2017020128
Found a fight in game 2017020140
Found a fight in game 2017020145
Found a fight in game 2017020147
Found a fi

In [55]:
# 2016-2017 season
getFightGameDataCSV(20162017)

Found a fight in game 2016020001
Found a fight in game 2016020003
Found a fight in game 2016020009
Found a fight in game 2016020007
Found a fight in game 2016020014
Found a fight in game 2016020017
Found a fight in game 2016020024
Found a fight in game 2016020035
Found a fight in game 2016020037
Found a fight in game 2016020039
Found a fight in game 2016020055
Found a fight in game 2016020058
Found a fight in game 2016020064
Found a fight in game 2016020068
Found a fight in game 2016020074
Found a fight in game 2016020076
Found a fight in game 2016020083
Found a fight in game 2016020093
Found a fight in game 2016020099
Found a fight in game 2016020102
Found a fight in game 2016020105
Found a fight in game 2016020111
Found a fight in game 2016020112
Found a fight in game 2016020116
Found a fight in game 2016020129
Found a fight in game 2016020133
Found a fight in game 2016020131
Found a fight in game 2016020135
Found a fight in game 2016020136
Found a fight in game 2016020140
Found a fi

In [56]:
# 2015-2016 season
getFightGameDataCSV(20152016)

Found a fight in game 2015020003
Found a fight in game 2015020004
Found a fight in game 2015020008
Found a fight in game 2015020020
Found a fight in game 2015020027
Found a fight in game 2015020034
Found a fight in game 2015020036
Found a fight in game 2015020045
Found a fight in game 2015020046
Found a fight in game 2015020049
Found a fight in game 2015020051
Found a fight in game 2015020062
Found a fight in game 2015020066
Found a fight in game 2015020072
Found a fight in game 2015020073
Found a fight in game 2015020074
Found a fight in game 2015020075
Found a fight in game 2015020079
Found a fight in game 2015020081
Found a fight in game 2015020084
Found a fight in game 2015020085
Found a fight in game 2015020094
Found a fight in game 2015020101
Found a fight in game 2015020103
Found a fight in game 2015020113
Found a fight in game 2015020115
Found a fight in game 2015020120
Found a fight in game 2015020123
Found a fight in game 2015020132
Found a fight in game 2015020141
Found a fi

In [20]:
data = []
for gameID in gameIDArray:
    data = data + getFightGameData(gameID)

game 2018020001 has no fights
Found a fight in game 2018020002
Found a fight in game 2018020003
game 2018020004 has no fights
game 2018020005 has no fights
game 2018020007 has no fights
game 2018020008 has no fights
game 2018020006 has no fights
game 2018020009 has no fights
game 2018020010 has no fights
game 2018020011 has no fights
game 2018020012 has no fights
game 2018020013 has no fights
game 2018020014 has no fights
game 2018020015 has no fights
game 2018020016 has no fights
game 2018020020 has no fights
Found a fight in game 2018020017
game 2018020018 has no fights
game 2018020019 has no fights
game 2018020021 has no fights
game 2018020022 has no fights
Found a fight in game 2018020023
game 2018020024 has no fights
Found a fight in game 2018020025
game 2018020026 has no fights
game 2018020027 has no fights
game 2018020028 has no fights
game 2018020029 has no fights
game 2018020030 has no fights
Found a fight in game 2018020031
game 2018020032 has no fights
game 2018020033 has no

In [21]:
df = pd.DataFrame(data)
df.to_csv('AllFightGameData-1819.csv', index=False)

In [26]:
data = []
for gameID in gameIDArray:
    data = data + getGameData(gameID)
    print(f'done parsing for game: {gameID}')

done parsing for game: 2018020001
done parsing for game: 2018020002
done parsing for game: 2018020003
done parsing for game: 2018020004
done parsing for game: 2018020005
done parsing for game: 2018020007
done parsing for game: 2018020008
done parsing for game: 2018020006
done parsing for game: 2018020009
done parsing for game: 2018020010
done parsing for game: 2018020011
done parsing for game: 2018020012
done parsing for game: 2018020013
done parsing for game: 2018020014
done parsing for game: 2018020015
done parsing for game: 2018020016
done parsing for game: 2018020020
done parsing for game: 2018020017
done parsing for game: 2018020018
done parsing for game: 2018020019
done parsing for game: 2018020021
done parsing for game: 2018020022
done parsing for game: 2018020023
done parsing for game: 2018020024
done parsing for game: 2018020025
done parsing for game: 2018020026
done parsing for game: 2018020027
done parsing for game: 2018020028
done parsing for game: 2018020029
done parsing f

In [27]:
df = pd.DataFrame(data)
df.to_csv('AllGameData.csv', index=False)

In [28]:
df.head()

Unnamed: 0,gameId,homeTeam,homeTeamId,awayTeam,awayTeamId,event,eventTypeId,eventIDx,eventID,secondaryType,...,player2FullName,player2Type,player3ID,player3FullName,player3Type,player4ID,player4FullName,player4Type,teamID,teamName
0,2018020001,Toronto Maple Leafs,10,Montréal Canadiens,8,Game Scheduled,GAME_SCHEDULED,0,1,,...,,,,,,,,,,
1,2018020001,Toronto Maple Leafs,10,Montréal Canadiens,8,Period Ready,PERIOD_READY,1,5,,...,,,,,,,,,,
2,2018020001,Toronto Maple Leafs,10,Montréal Canadiens,8,Period Start,PERIOD_START,2,8,,...,,,,,,,,,,
3,2018020001,Toronto Maple Leafs,10,Montréal Canadiens,8,Faceoff,FACEOFF,3,9,,...,Auston Matthews,Loser,,,,,,,8.0,Montréal Canadiens
4,2018020001,Toronto Maple Leafs,10,Montréal Canadiens,8,Shot,SHOT,4,10,Backhand,...,Frederik Andersen,Goalie,,,,,,,8.0,Montréal Canadiens


In [27]:
games20192020 = getSeasonGameIDs(20192020)
data20192020 = []
for gameID in games20192020:
    data20192020 = data20192020 + getFightGameData(gameID)

game 2019020001 has no fights
game 2019020002 has no fights
game 2019020003 has no fights
game 2019020004 has no fights
game 2019020005 has no fights
game 2019020006 has no fights
game 2019020007 has no fights
game 2019020008 has no fights
game 2019020009 has no fights
game 2019020010 has no fights
Found a fight in game 2019020011
game 2019020012 has no fights
game 2019020016 has no fights
game 2019020013 has no fights
game 2019020014 has no fights
game 2019020015 has no fights
game 2019020017 has no fights
game 2019020018 has no fights
game 2019020019 has no fights
game 2019020020 has no fights
game 2019020021 has no fights
Found a fight in game 2019020022
game 2019020023 has no fights
game 2019020024 has no fights
game 2019020025 has no fights
game 2019020026 has no fights
game 2019020027 has no fights
game 2019020028 has no fights
game 2019020029 has no fights
Found a fight in game 2019020030
game 2019020031 has no fights
game 2019020032 has no fights
game 2019020033 has no fights
g

In [28]:
df = pd.DataFrame(data20192020)
df.to_csv('AllFightGameData-1920.csv', index=False)

In [30]:
games20202021 = getSeasonGameIDs(20202021)
data20202021 = []
for gameID in games20202021:
    data20202021 = data20202021 + getFightGameData(gameID)

Found a fight in game 2020020003
Found a fight in game 2020020006
Found a fight in game 2020020010
Found a fight in game 2020020023
Found a fight in game 2020020038
Found a fight in game 2020020063
Found a fight in game 2020020064
Found a fight in game 2020020080
Found a fight in game 2020020079
Found a fight in game 2020020092
Found a fight in game 2020020106
Found a fight in game 2020020115
Found a fight in game 2020020118
Found a fight in game 2020020117
Found a fight in game 2020020129
Found a fight in game 2020020135
Found a fight in game 2020020144
Found a fight in game 2020020145
Found a fight in game 2020020158
Found a fight in game 2020020161
Found a fight in game 2020020164
Found a fight in game 2020020170
Found a fight in game 2020020171
Found a fight in game 2020020174
Found a fight in game 2020020550
Found a fight in game 2020020189
Found a fight in game 2020020190
Found a fight in game 2020020193
Found a fight in game 2020020199
Found a fight in game 2020020198
Found a fi

In [31]:
df = pd.DataFrame(data20202021)
df.to_csv('AllFightGameData-2021.csv', index=False)