<a href="https://colab.research.google.com/github/asonty/ngs_highlights/blob/master/ngs_players_on_field.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [23]:
import requests
import pandas as pd

In [24]:
ngs_headers = {
  "accept": "application/json, text/plain, */*",
  "accept-encoding": "gzip, deflate, br",
  "accept-language": "en-US,en;q=0.9",
  "referer": "https://nextgenstats.nfl.com/live",
  "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 11_1_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36",
  "x-override-env": "true"
}

### schedules

In [25]:
seasons = [2020, 2021] # 2018 and 2019 would work too, but much less completeness w.r.t. players' nflIds
season_schedule_url = 'https://nextgenstats.nfl.com/api/league/schedule?season={}'
seasons_schedules = [requests.get(season_schedule_url.format(season), headers=ngs_headers).json() for season in seasons]
seasons_schedules = [game for season in seasons_schedules for game in season]

In [26]:
len(seasons_schedules)

591

In [27]:
seasons_schedules[0].keys()

dict_keys(['gameKey', 'gameDate', 'gameId', 'gameTimeEastern', 'gameType', 'homeDisplayName', 'homeNickname', 'homeTeam', 'homeTeamAbbr', 'homeTeamId', 'isoTime', 'networkChannel', 'ngsGame', 'season', 'seasonType', 'site', 'smartId', 'visitorDisplayName', 'visitorNickname', 'visitorTeam', 'visitorTeamAbbr', 'visitorTeamId', 'week', 'weekNameAbbr', 'score', 'validated', 'releasedToClubs'])

In [28]:
df_seasons_games = pd.DataFrame.from_dict(seasons_schedules)[['ngsGame', 'season', 'seasonType', 'gameDate', 'gameId', 'gameType', 'visitorTeamAbbr', 'visitorTeamId', 'homeTeamAbbr', 'homeTeamId']]

In [95]:
df_seasons_games.to_csv('nfl_seasons_games_2020_2021.csv')

In [96]:
df_seasons_games[df_seasons_games['seasonType'] == 'POST'].head(5)

Unnamed: 0,ngsGame,season,seasonType,gameDate,gameId,gameType,visitorTeamAbbr,visitorTeamId,homeTeamAbbr,homeTeamId
256,True,2020,POST,01/10/2021,2021011002,POST,CLE,1050,PIT,3900
257,True,2020,POST,01/10/2021,2021011001,POST,CHI,810,NO,3300
258,True,2020,POST,01/10/2021,2021011000,POST,BAL,325,TEN,2100
259,True,2020,POST,01/09/2021,2021010902,POST,TB,4900,WAS,5110
260,True,2020,POST,01/09/2021,2021010901,POST,LA,2510,SEA,4600


### players

In [31]:
players_url = 'https://nextgenstats.nfl.com/api/plays/highlight/players'
players = requests.get(players_url, headers=ngs_headers).json()
players = players['players']

In [32]:
len(players)

4591

In [33]:
players[0].keys()

dict_keys(['season', 'seasonType', 'week', 'birthDate', 'collegeConference', 'collegeName', 'currentTeamId', 'displayName', 'draftClub', 'draftNumber', 'entryYear', 'esbId', 'firstName', 'footballName', 'gsisId', 'gsisItId', 'headshot', 'height', 'jerseyNumber', 'lastName', 'position', 'positionGroup', 'rookieYear', 'shortName', 'status', 'statusDescriptionAbbr', 'statusShortDescription', 'suffix', 'teamAbbr', 'weight', 'uniformNumber', 'ngsPosition', 'ngsPositionGroup'])

In [58]:
df_players = pd.DataFrame.from_dict(players)
df_players = df_players.dropna(axis=0, subset=['gsisItId'])
df_players = df_players.drop_duplicates(subset='gsisItId', keep='last')
df_players = df_players[['displayName', 'position', 'positionGroup', 'gsisId', 'gsisItId', 'esbId']]

In [62]:
df_players.head()

Unnamed: 0,displayName,position,positionGroup,gsisId,gsisItId,esbId
2,Jonathan Stewart,RB,RB,00-0026153,33094.0,STE770966
3,Eli Manning,QB,QB,00-0022803,28953.0,MAN473170
5,Michael Crabtree,WR,WR,00-0026986,34461.0,CRA111040
8,Jason McCourty,CB,DB,00-0027136,34654.0,MCC600480
10,James Develin,FB,RB,00-0027925,36473.0,DEV145356


In [112]:
df_players.groupby(['position']).size()

position
CB     103
DB       2
DE      90
DT      44
FB       9
FS      43
G        1
HB       1
ILB     38
K        2
LB       4
MLB      9
NT      13
OLB     81
OT       1
P        3
QB      86
RB     122
S        1
SS      38
T        9
TE      94
WR     230
dtype: int64

### games/plays

only going to scrape for a few games here, but should be able to scrape for all games

In [97]:
game_url = 'https://nextgenstats.nfl.com/api/live/plays/playlist/game?gameId={}'
games_plays = []
for idx, row in df_seasons_games[df_seasons_games['seasonType'] == 'POST'].head(5).iterrows():
  game_plays = requests.get(game_url.format(row['gameId']), headers=ngs_headers).json()
  game_plays = game_plays['plays']
  games_plays.append(game_plays)
games_plays = [play for game in games_plays for play in game]
games_plays = [play for play in games_plays if len(play['nflIds']) > 0]

In [98]:
df_games_plays = pd.json_normalize(games_plays)

In [105]:
df_games_plays.head(3)

Unnamed: 0,gameId,playId,sequence,down,homeScore,isBigPlay,isSTPlay,isScoring,playDescription,playState,playStats,playType,possessionTeam,possessionTeamId,quarter,season,seasonType,visitorScore,week,yardlineNumber,yardlineSide,yardsToGo,isRedzonePlay,endGameClock,startGameClock,nflIds,isMarkerPlay,offense.offenseFormation,offense.personnel,defense.defendersInTheBox,defense.personnel,defense.numberOfPassRushers
0,2021011002,40,40,0,0,False,True,False,C.Parkey kicks 64 yards from CLV 35 to PIT 1. ...,APPROVED,"[{'playId': 40, 'clubCode': 'CLV', 'playerName...",play_type_kickoff,CLE,1050,1,2020,POST,0,18,35,CLE,0,False,14:56,15:00,"[46684, 48136, 46256, 40476, 43303, 41688, 434...",False,,,,,
1,2021011002,62,62,1,0,False,False,True,(14:54) (Shotgun) B.Roethlisberger Aborted. M....,APPROVED,"[{'playId': 62, 'clubCode': 'PIT', 'playerName...",play_type_rush,PIT,3900,1,2020,POST,6,18,22,PIT,10,False,14:48,14:54,"[44874, 37093, 43398, 47863, 41959, 43303, 399...",False,SHOTGUN,"1 RB, 1 TE, 3 WR",6.0,"4 DL, 2 LB, 5 DB",
2,2021011002,89,89,0,0,False,True,True,"C.Parkey extra point is GOOD, Center-C.Hughlet...",APPROVED,"[{'playId': 89, 'clubCode': 'CLV', 'playerName...",play_type_xp,CLE,1050,1,2020,POST,7,18,15,PIT,0,False,14:46,14:46,"[48485, 45798, 48139, 40070, 48136, 44906, 432...",False,,,,,


this df contains a few interesting columns that the public may not already have access to:
 - `offense.offenseFormation`
 - `offense.personnel`
 - `defense.defendersInTheBox`
 - `defense.personnel`
 - `defense.numberOfPassRushers`
 - `nflIds` (we'll get to this)

### players on the field for a play

In [113]:
df_games_plays_players = df_games_plays[['gameId', 'playId', 'nflIds']].explode('nflIds').merge(
    right=df_players,
    how='left',
    left_on='nflIds',
    right_on='gsisItId'
)

In [116]:
df_games_plays[
  (df_games_plays['gameId'] == 2021010902) & 
  (df_games_plays['possessionTeam'] == 'WAS') & 
  (df_games_plays['isBigPlay'] == True) &
  (df_games_plays['isScoring'] == True) &
  (df_games_plays['playType'] == 'play_type_pass')
]

Unnamed: 0,gameId,playId,sequence,down,homeScore,isBigPlay,isSTPlay,isScoring,playDescription,playState,playStats,playType,possessionTeam,possessionTeamId,quarter,season,seasonType,visitorScore,week,yardlineNumber,yardlineSide,yardsToGo,isRedzonePlay,endGameClock,startGameClock,nflIds,isMarkerPlay,offense.offenseFormation,offense.personnel,defense.defendersInTheBox,defense.personnel,defense.numberOfPassRushers
647,2021010902,3912,3912,3,22,True,False,True,(4:57) (Shotgun) T.Heinicke pass short left to...,APPROVED,"[{'playId': 3912, 'clubCode': 'WAS', 'statId':...",play_type_pass,WAS,5110,4,2020,POST,28,18,11,TB,10,True,4:53,4:57,"[46186, 42348, 41295, 46132, 52453, 41563, 419...",False,EMPTY,"1 RB, 1 TE, 3 WR",5.0,"2 DL, 4 LB, 5 DB",4.0


In [117]:
df_games_plays_players[(df_games_plays_players['gameId'] == 2021010902) & (df_games_plays_players['playId'] == 3912)]

Unnamed: 0,gameId,playId,nflIds,displayName,position,positionGroup,gsisId,gsisItId,esbId
14234,2021010902,3912,46186,,,,,,
14235,2021010902,3912,42348,,,,,,
14236,2021010902,3912,41295,,,,,,
14237,2021010902,3912,46132,Carlton Davis,CB,DB,00-0034778,46132.0,DAV220184
14238,2021010902,3912,52453,Antoine Winfield,FS,DB,00-0036411,52453.0,WIN159348
14239,2021010902,3912,41563,,,,,,
14240,2021010902,3912,41915,Shaquil Barrett,OLB,LB,00-0031101,41915.0,BAR645290
14241,2021010902,3912,46362,Cam Sims,WR,WR,00-0034104,46362.0,SIM667389
14242,2021010902,3912,47822,,,,,,
14243,2021010902,3912,35454,Jason Pierre-Paul,DE,DL,00-0027867,35454.0,PIE587019
