In [1]:
import nfl_data_py as nfl

import numpy as np
import pandas as pd
import difflib


import matplotlib.pyplot as plt
import mplcursors
%matplotlib widget

In [2]:
pd.options.plotting.backend = "plotly"

In [3]:
def compact_columns(df, num_cols = 5, col_width = 25):
    cols = list(df.columns)
    row_string = ''
    for k, c in enumerate(cols):
        row_item = f"{c} {(col_width - len(c))*' '}"
        row_string = row_string + row_item
        
        if (k+1)%num_cols==0:
            print(row_string)
            row_string = ''
                       
    return 

## Getting snap counts

Getting snap counts for every game from the past 10 seasons

In [4]:
# A quick inspection of the snap_counts data
years = list(range(2013, 2023))
df = nfl.import_snap_counts(years)
compact_columns(df)
df.query('season == 2020 and week == 21').head()


game_id                   pfr_game_id               season                    game_type                 week                      
player                    pfr_player_id             position                  team                      opponent                  
offense_snaps             offense_pct               defense_snaps             defense_pct               st_snaps                  


Unnamed: 0,game_id,pfr_game_id,season,game_type,week,player,pfr_player_id,position,team,opponent,offense_snaps,offense_pct,defense_snaps,defense_pct,st_snaps,st_pct
24908,2020_21_KC_TB,202102070tam,2020,SB,21,Nick Allegretti,AlleNi00,G,TB,KC,75.0,1.0,0.0,0.0,3.0,0.11
24909,2020_21_KC_TB,202102070tam,2020,SB,21,Andrew Wylie,WyliAn00,T,TB,KC,75.0,1.0,0.0,0.0,3.0,0.11
24910,2020_21_KC_TB,202102070tam,2020,SB,21,Stefen Wisniewski,WisnSt01,G,TB,KC,75.0,1.0,0.0,0.0,3.0,0.11
24911,2020_21_KC_TB,202102070tam,2020,SB,21,Austin Reiter,ReitAu00,C,TB,KC,75.0,1.0,0.0,0.0,3.0,0.11
24912,2020_21_KC_TB,202102070tam,2020,SB,21,Patrick Mahomes,MahoPa00,QB,TB,KC,75.0,1.0,0.0,0.0,0.0,0.0


Note: The Super Bowl teams are swapped. This error only appears in the SB and not for every season


In [5]:
years = list(range(2013, 2023))
snap_df = nfl.import_snap_counts(years)[['pfr_player_id', 'pfr_game_id', 'team', 'season', 'week', 'offense_snaps', 'offense_pct', 'defense_snaps', 'defense_pct']]
snap_df.head(50)
# snap_df = nfl.import_snap_counts(years = years)[['player', 'pfr_player_id', 'team', 'season', 'week', 'game_type', 'game_id', 'pfr_game_id', 'offense_snaps', 'offense_pct', 'defense_snaps', 'defense_pct']]
# compact_columns(snap_df)

# snap_df.query('season == 2020 and week == 21').head()

Unnamed: 0,pfr_player_id,pfr_game_id,team,season,week,offense_snaps,offense_pct,defense_snaps,defense_pct
0,FranOr00,201309050den,DEN,2013,1,71.0,1.0,0.0,0.0
1,CladRy20,201309050den,DEN,2013,1,71.0,1.0,0.0,0.0
2,BeadZa20,201309050den,DEN,2013,1,71.0,1.0,0.0,0.0
3,VasqLo20,201309050den,DEN,2013,1,71.0,1.0,0.0,0.0
4,ThomJu00,201309050den,DEN,2013,1,71.0,1.0,0.0,0.0
5,RamiMa20,201309050den,DEN,2013,1,71.0,1.0,0.0,0.0
6,MannPe00,201309050den,DEN,2013,1,71.0,1.0,0.0,0.0
7,ThomDe03,201309050den,DEN,2013,1,68.0,0.96,0.0,0.0
8,DeckEr00,201309050den,DEN,2013,1,64.0,0.9,0.0,0.0
9,WelkWe00,201309050den,DEN,2013,1,56.0,0.79,0.0,0.0


## Calculating Total Snaps Per Game

This info is not provided, but can be back calculated using individual snap counts and snap percentages. I average over individuals to minimize any aberations.

The repeated groupby.apply is silly. I wish I had a better workaround for generating multiple named columns in a grouped dataframe.

In [6]:
calcTotal = lambda count, pct: np.round(np.sum(count)/np.sum(pct))

offense_snapsPerGame_df = snap_df[['pfr_game_id', 'team', 'offense_snaps', 'offense_pct']] \
    .groupby(['pfr_game_id', 'team']) \
    .apply(func = lambda x: pd.Series({'offense_tot_snaps': calcTotal(x.offense_snaps, x.offense_pct)}))
defense_snapsPerGame_df = snap_df[['pfr_game_id', 'team','defense_snaps', 'defense_pct']] \
    .groupby(['pfr_game_id', 'team']) \
    .apply(func = lambda x: pd.Series({'defense_tot_snaps': calcTotal(x.defense_snaps, x.defense_pct)}))


In [7]:
defense_snapsPerGame_df

Unnamed: 0_level_0,Unnamed: 1_level_0,defense_tot_snaps
pfr_game_id,team,Unnamed: 2_level_1
201309050den,BAL,71.0
201309050den,DEN,89.0
201309080buf,BUF,91.0
201309080buf,NE,63.0
201309080car,CAR,65.0
...,...,...
202301290kan,KC,66.0
202301290phi,PHI,46.0
202301290phi,SF,74.0
202302120phi,KC,55.0


## Drafted players


In [None]:
drafted_df = nfl.import_draft_picks(years)[['pfr_player_id', 'pfr_player_name', 'category', 'season', 'round', 'pick']]
drafted_df.rename(columns = {'pfr_player_name':'player', 'category':'position_group'}, inplace = True)

In drafted_df, 'season' refers to the year in which the player was drafted. In snap_df, 'season' refers to the year in which the game was played. By leaving these fields named the same, upon merging we will be left with the snap counts for players in their rookie season.

In [None]:
rookieSnaps_df = drafted_df.merge(snap_df, how = 'inner', on = ['pfr_player_id', 'season'])
rookieSnaps_df.head(10)
# # Add total snaps per game
# rookieSnaps_df = rookieSnaps_df.merge(offense_snapsPerGame_df, how = 'left', on = ['pfr_game_id', 'team'])
# rookieSnaps_df = rookieSnaps_df.merge(defense_snapsPerGame_df, how = 'left', on = ['pfr_game_id', 'team'])

### Filling in "Missing" Data

The best ability is availability.

As is, if a player wasn't active on game day, there is no row for that game/player. As an example, Landon Dickerson (Philadelphia Guard) is missing the row corresponding to week 1; he was not suited for that game.

If a player is inactive, they aren't contributing. Instead, that player should be marked as taking 0 out of tot_snaps for that game.

In [None]:
player = 'Landon Dickerson'
rookieSnaps_df.query('player == @player').head(20)

In [None]:
team1 = 'PHI'
team2 = 'DAL'
season1 = 2020
season2 = 2021
df = rookieSnaps_df.query('(team == @team1 or team == @team2) and (season == @season1 or season == @season2) and (week>=1 and week<12)')

display(df.head())

df = df.pivot(\
    index = ['team', 'season', 'player', 'pfr_player_id', 'position_group', 'round', 'pick'], \
    columns = ['week']).fillna(0).stack() \
    # values = ['pfr_game_id', 'offense_snaps', 'offense_pct', 'defense_snaps', 'defense_pct']).fillna(0)
display(df.head(25))

# print(df.columns)

# df2 = df.stack()
# display(df2.head(10))

In [None]:
# The fix

# A table with the offensive and defensive snap counts for every game
allGames_df = rookieSnaps_df[['team', 'season', 'week', 'game_type', 'game_id', 'pfr_game_id', 'offense_tot_snaps', 'defense_tot_snaps']].drop_duplicates()
display(allGames_df.sort_values(by = ['season', 'week', 'team']).head(8))

allPlayers_df = rookieSnaps_df[['pfr_player_id', 'player',  'position_group', 'round', 'pick', 'team', 'season']].drop_duplicates()
display(allPlayers_df.sort_values(by = ['season', 'team']))


In [None]:
allPlayerGames_df = pd.merge(allPlayers_df, allGames_df, on = ['team', 'season'], how='outer')

In [None]:
player = 'Landon Dickerson'
allPlayerGames_df.query('player == @player').sort_values(['team', 'season', 'week']).head(5)

In [None]:
team = 'PHI'
season = 2021
df = rookieSnaps_df.query('team == @team and season == @season')

display(df.head())

df = df.pivot(index = ['team', 'season', 'player', 'round'], columns = ['week', 'pfr_game_id'], values = ['offense_snaps', 'defense_snaps'])
display(df)

In [None]:
df = pd.merge(rookieSnaps_df, allPlayerGames)

In [None]:

rookieSnaps_df = rookieSnaps_df.merge(offense_snapsPerGame_df, how = 'left', on = ['pfr_game_id', 'team'])
rookieSnaps_df = rookieSnaps_df.merge(defense_snapsPerGame_df, how = 'left', on = ['pfr_game_id', 'team'])

# Add offense defense flag
rookieSnaps_df['on_offense'] = rookieSnaps_df['position_group'].isin(['QB', 'OL', 'RB', 'TE', 'WR'])
rookieSnaps_df['on_defense'] = rookieSnaps_df['position_group'].isin(['DL', 'LB', 'DB'])

# Consolidate snaps
# Instead of each player haveing both offense and defense snaps, players have an offense/defense flag and then snaps.
# This assumes a player doesn't play both sides of the ball...which excepting few snaps a season is true.
rookieSnaps_df['snaps'] = rookieSnaps_df['on_offense'] * rookieSnaps_df['offense_snaps'] + rookieSnaps_df['on_defense'] * rookieSnaps_df['defense_snaps']
rookieSnaps_df['tot_snaps'] = rookieSnaps_df['on_offense'] * rookieSnaps_df['offense_tot_snaps'] + rookieSnaps_df['on_defense'] * rookieSnaps_df['defense_tot_snaps']
rookieSnaps_df['snap_pct'] = rookieSnaps_df['on_offense'] * rookieSnaps_df['offense_pct'] + rookieSnaps_df['on_defense'] * rookieSnaps_df['defense_pct']

# drop individual offense/defense snap columns
rookieSnaps_df.drop(['offense_snaps', 'offense_pct', 'offense_tot_snaps','defense_snaps', 'defense_pct', 'defense_tot_snaps',], axis = 'columns', inplace=True)

### Filling in "Missing" Data

The best ability is availability.

As is, if a player wasn't active on game day, there is no row for that game/player. As an example, Landon Dickerson (Philadelphia Guard) is missing the row corresponding to week 1; he was not suited for that game.

If a player is inactive, they aren't contributing. Instead, that player should be marked as taking 0 out of tot_snaps for that game.

In [None]:
allGames_df = rookieSnaps_df[['team', 'season', 'week', 'game_type', 'game_id', 'pfr_game_id', 'on_offense', 'on_defense', 'tot_snaps']].drop_duplicates()
allGames_df.sort_values(by = ['season', 'week', 'team']).head(8)

In [None]:
df = rookieSnaps_df.join(allGames_df, on = [])

## Punch list:

When players weren't active for a game, that game does not show up as a line item. It should be filled in and recorded as 0 snaps with the right number of total snaps.

## Visualization goal

### Snaps per game histogram
Histograms of snap percentage by round.
Each game contributes one datum to the histogram.


### Snaps per season histogram



### UI

Slider bars to select years
Dropdown of teams to overlay on histograms.


Columns: offense, defense
Rows: Rd 1, Rd2, Rd 3, Rd 4-7
Histograms stacked by position

In [None]:
# df = rookieSnaps_df.groupby(['player'], group_keys=True)
name1 = 'Landon Dickerson'
name2 = ''
rookieSnaps_df.query('player == @name1').head(30)


In [None]:
import plotly.express as px

game_type = 'REG'
df = rookieSnaps_df.query('on_offense and round==2 and game_type==@game_type')


fig = px.histogram(df, x = 'snap_pct', nbins = 22, color = 'position_group')
fig.show()

In [None]:


from plotly.subplots import make_subplots
import plotly.graph_objects as go

fig = make_subplots(
    rows=4, cols=2)

fig.add_trace(go.Scatter(x=[1, 2, 3], y=[4, 5, 6]),
              row=1, col=1)

fig.add_trace(go.Scatter(x=[20, 30, 40], y=[50, 60, 70]),
              row=1, col=2)

fig.add_trace(go.Scatter(x=[300, 400, 500], y=[600, 700, 800]),
              row=2, col=1)

fig.add_trace(go.Scatter(x=[4000, 5000, 6000], y=[7000, 8000, 9000]),
              row=2, col=2)

fig.update_layout(height=500, width=700,
                  title_text="Multiple Subplots with Titles")

fig.show()

In [None]:
name = 'Andre Dillard'
drafted_df.head(5)
drafted_df.query('player == @name')

In [None]:
defense_snapsPerGame_df.head()

In [None]:
game = '202209110det'
team = 'PHI'
season = 2022
snap_df.query('season==@season and pfr_game_id == @game and team == @team').head(20)

In [None]:
df = nfl.import_players()
compact_columns(df)
df[['entry_year', 'rookie_year']].head(25)

In [None]:
players_df = nfl.import_players()[['gsis_id', 'display_name', 'draft_club', 'position_group', 'draft_number']]
players_df.rename(columns={'display_name':'player', 'draft_club':'draft_team'}, inplace=True)

ids_df = nfl.import_ids()[['gsis_id', 'pfr_id', 'team', 'name', 'draft_year', 'draft_round']]
ids_df.rename(columns={'name':'player'}, inplace=True)


In [None]:
players_df.head()

In [None]:
ids_df.sort_values(by = 'player').head(20)

In [None]:
players_df.join(ids_df, on = 'player', how = 'left')

In [None]:
name = 'DeVonta Smith'
ids_df[['pfr_id', 'name']].query('name == @name')

In [None]:
df2 = nfl.import_players()[['player'display_name', 'first_name']]
compact_columns(df2, 2)


In [None]:
df2 = nfl.import_players()
df2[['display_name','gsis_id', 'gsis_it_id', 'smart_id', 'esb_id']].head()


In [None]:
df = pd.DataFrame({'A': 'a a b'.split(),
                   'B': [1,2,3],
                   'C': [4,6,5]})
display(df)

In [None]:
player = 'Landon Dickerson'
year = 2021
snap_df.query('player == @player and season == @year')

In [None]:
df = nfl.import_rosters([2022])
compact_columns(df)

In [None]:
team = 'PIT'
last_name = 'Pickett'
df[['team', 'player_name', 'last_name', 'player_id', 'pff_id', 'rookie_year']].query('team == @team and last_name == @last_name')

In [None]:
drafted_df = nfl.import_draft_picks([2022])['gsis_id', 'pfr_player_id', 'pfr_player_name', 'position', 'season', 'round', 'pick']
drafted_df.rename(columns = {'pfr_player_name':'player', 'pfr_player_id':'pfr_id'})
compact_columns(drafted_df)

In [None]:
draft_df = nfl.import_draft_picks([2012])
name = 'Russell Wilson'
draft_df.query('pfr_player_name == @name').team

In [None]:
draft_df['position'].unique()

In [None]:
nfl.impo