In [28]:
import pandas as pd
import numpy as np
import nfl_data_py as nfl
import plotly.express as px
import plotly.graph_objects as go
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
''' Import Data '''

pbp_data: pd.DataFrame = nfl.import_pbp_data([2025], downcast=True)

pbp_data = pbp_data.loc[pbp_data['season_type'] == 'REG', :]

2025 done.
Downcasting floats.


In [14]:
''' Some more fields '''

## Success
pbp_data['% ydstogo'] = pbp_data['yards_gained'] / pbp_data['ydstogo']

pbp_data['Successful Play'] = (
    ((pbp_data['down'] == 1) & (pbp_data['% ydstogo'] >= 0.4)) |
    ((pbp_data['down'] == 2) & (pbp_data['% ydstogo'] >= 0.6)) |
    (pbp_data['first_down'] == 1) |
    (pbp_data['touchdown'] == 1)
)

## Snaps
pbp_data['Snap'] = ((pbp_data['pass'] == 1) | (pbp_data['rush']) == 1 | (pbp_data['special'] == 1))

## Yard Thresholds
def distance_range(down, yds):
       
    down_s = ''
    match down:
        case 1:
            return '1st'
        case 2:
            down_s = '2nd'
        case 3:
            down_s = '3rd'
        case 4:
            down_s = '4th'
        case default:
            return ''
        
    yds_range = ''
    if yds <= 2:
        yds_range = 'Short'
    elif yds <= 6:
        yds_range = 'Medium'
    else:
        yds_range = 'Long'

    return f'{down_s} & {yds_range}'
    
pbp_data['Down & Distance'] = pbp_data.apply(lambda x: distance_range(x['down'], x['ydstogo']), axis=1)


In [15]:
''' Rushing Data '''

run_data = pbp_data.loc[pbp_data['play_type'] == 'run', :].copy()

runners = run_data.groupby('rusher_player_id').aggregate(
    Name=('rusher_player_name', 'first'),
    Games=('game_id', 'nunique'),
    Attempts=('rusher_player_id', 'size'),
    Yards=('yards_gained', 'sum'),
    TDs=('touchdown', 'sum'),
    FirstDowns=('first_down', 'sum'),
    Successes=('Successful Play', 'sum')
).reset_index()
runners['Yds / Att'] = round(runners['Yards'] / runners['Attempts'], 2)
runners['Success Rate'] = round((runners['Successes'] / runners['Attempts']) * 100, 2)
runners['1D Rate'] = round((runners['FirstDowns'] / runners['Attempts']) * 100, 2)
runners['TD Rate'] = round((runners['TDs'] / runners['Attempts']) * 100, 2)

runners = runners.sort_values(by='Yards', ascending=False).reset_index(drop=True)

top_10_runners = runners.loc[:10, 'rusher_player_id'].tolist()

print(f'Top runners: {top_10_runners}')
print(runners.head(25).to_string())

Top runners: ['00-0036223', '00-0037248', '00-0036973', '00-0038542', '00-0036997', '00-0037840', '00-0036158', '00-0037525', '00-0035261', '00-0034844', '00-0032764']
   rusher_player_id         Name  Games  Attempts  Yards  TDs  FirstDowns  Successes  Yds / Att  Success Rate  1D Rate  TD Rate
0        00-0036223     J.Taylor      3        60  338.0  3.0        13.0         33       5.63         55.00    21.67     5.00
1        00-0037248       J.Cook      3        53  284.0  4.0        15.0         33       5.36         62.26    28.30     7.55
2        00-0036973    T.Etienne      3        46  270.0  1.0         8.0         21       5.87         45.65    17.39     2.17
3        00-0038542   B.Robinson      3        47  239.0  0.0         9.0         26       5.09         55.32    19.15     0.00
4        00-0036997   J.Williams      3        43  227.0  3.0        14.0         29       5.28         67.44    32.56     6.98
5        00-0037840   K.Williams      3        55  226.0  1.0   

In [46]:
fig = px.scatter(
    data_frame=runners.loc[runners['Attempts'] >= 10],
    x='Success Rate',
    y='Yds / Att',
    hover_data=['Attempts', 'Yards'],
    text='Name'
)
fig.update_traces(textposition='top center')
fig.update_layout(title='Yds per Attempt by Success Rate<br><sup>Min. 10 Attempts</sup>', xaxis_title='Success Rate (%)')
fig.show()

In [None]:

## Group by Down & Distances
rusher_carries = run_data.groupby(['rusher_player_id', 'Down & Distance']).aggregate(
    Name=('rusher_player_name', 'first'),
    Attempts=('rusher_player_id', 'size'),
    Successes=('Successful Play', 'sum')
).reset_index()

rusher_carries = rusher_carries.merge(
    runners[['rusher_player_id', 'Attempts', 'Success Rate']].rename(columns={'Attempts': 'Total Attempts', 'Success Rate': 'Overall Success Rate'}), 
    on='rusher_player_id', how='left'
    ).sort_values(by='Total Attempts', ascending=False).reset_index(drop=True)
rusher_carries['% Carries'] = round((rusher_carries['Attempts'] / rusher_carries['Total Attempts']) * 100, 2)
rusher_carries['Success Rate'] = round((rusher_carries['Successes'] / rusher_carries['Attempts']) * 100, 2)
# print(rusher_carries.head(10).to_string())

## Pivot to wide
piv = rusher_carries.loc[(rusher_carries['rusher_player_id'].isin(top_10_runners)) 
                         & (rusher_carries['Down & Distance'] != ''),:].pivot(
    index=['rusher_player_id', 'Name'],
    columns=['Down & Distance'],
    values=['% Carries', 'Success Rate']
).swaplevel(0, 1, axis=1).sort_index(axis=1).reset_index().set_index('Name').drop(columns='rusher_player_id')
# print(piv.head().to_string())

down_distances = ['1st', '2nd & Short', '2nd & Medium', '2nd & Long', '3rd & Short', '3rd & Medium', '3rd & Long', '4th & Short', '4th & Medium']

## Heatmap
for col in ['% Carries', 'Success Rate']:
    idx = pd.IndexSlice
    sl = pd.DataFrame(piv.loc[:, idx[:, col]])
    sl.columns = sl.columns.get_level_values(0)
    sl = sl.reindex(columns=list(filter(lambda x: x in piv.columns, down_distances)))
    # print(sl.head().to_string())

    fig = px.imshow(
        sl,
        aspect="auto",
        text_auto=True,
        color_continuous_scale='Greens'
    )
    fig.update_xaxes(side="top")
    fig.update_layout(title=dict(text=f'{col} by Down & Distance', automargin=True), 
                      yaxis=dict(tickformat=".0%"))
    fig.show()


# annot_df = sl.copy() # Initialize with data, convert to string
# for r in annot_df.index:
#     for c in sl.columns:
#         annot_df.loc[r, c] = f"{piv.loc[r,(c, 'Attempts')]:,.0f}\n{piv.loc[r, (c, 'Success Rate')]:,.1f}%"

# print(annot_df.head().to_string())
# fig, ax = plt.subplots(figsize=(15,10))
# sns.heatmap(data=sl, annot=annot_df, fmt="", linewidth=0.5, cmap='crest', ax=ax, annot_kws={"size": 8, "va": "center"})
# ax.set(xlabel="", ylabel="", title='Attempts & Success Rate by Down / Distance')
# ax.xaxis.tick_top()
# ax.tick_params(axis='x', rotation=45)

Down & Distance       1st              2nd & Long              2nd & Medium              2nd & Short              3rd & Long              3rd & Medium              3rd & Short              4th & Short             
                % Carries Success Rate  % Carries Success Rate    % Carries Success Rate   % Carries Success Rate  % Carries Success Rate    % Carries Success Rate   % Carries Success Rate   % Carries Success Rate
Name                                                                                                                                                                                                                 
D.Henry             75.86        40.91       3.45         0.00        20.69        33.33         NaN          NaN        NaN          NaN          NaN          NaN         NaN          NaN         NaN          NaN
S.Barkley           67.24        46.15       8.62        20.00         8.62        80.00        8.62        60.00       5.17          0.0       


dropping on a non-lexsorted multi-index without a level parameter may impact performance.



In [None]:
# def agg_func(group):
#     print(group)

# grouped = pbp_data.loc[pbp_data['week'] == 1,:].groupby('rusher_player_name')


# print(f'Week 1 Rushers: {len(grouped):,}')
# for name, group_df in grouped:
#     print(name)
#     print(group_df.shape)
#     print(group_df.head().to_string())

rushers = pbp_data[['rusher_player_id', 'rusher_player_name']].dropna().drop_duplicates().reset_index(drop=True)

def snap_count(id):
    snaps_sl = pbp_data.loc[(pbp_data['Snap']) &
                        (~pbp_data['offense_players'].isna()) &
                      (pbp_data['offense_players'].str.contains(id)), :]
    
    return snaps_sl.shape[0]

rushers['Snaps'] = rushers['rusher_player_id'].apply(lambda x: snap_count(x))

rushers = rushers.sort_values(by='Snaps', ascending=False)

print(rushers.shape)
print(rushers.head(100).to_string())








(334, 3)
   rusher_player_id rusher_player_name Snaps
0        00-0033553           J.Conner  None
1        00-0035228           K.Murray  None
2        00-0037248             J.Cook  None
3        00-0039921           T.Benson  None
4        00-0035537         Ty.Johnson  None
5        00-0034857            J.Allen  None
6        00-0039875            R.Davis  None
7        00-0035500           G.Dortch  None
8        00-0032764            D.Henry  None
9        00-0034796          L.Jackson  None
10       00-0037197          I.Pacheco  None
11       00-0039894           X.Worthy  None
12       00-0039064          Z.Flowers  None
13       00-0034975             J.Hill  None
14       00-0033873          P.Mahomes  None
15       00-0039325           C.Steele  None
16       00-0033906           A.Kamara  None
17       00-0033357             T.Hill  None
18       00-0036555          C.Hubbard  None
19       00-0039150            B.Young  None
20       00-0035243          M.Sanders  None
2

In [80]:
print(pbp_data['offense_players'].head().to_string())

0                                                  NaN
1    00-0039807;00-0037141;00-0039864;00-0035961;00...
2    00-0034495;00-0035258;00-0034346;00-0035228;00...
3    00-0034495;00-0035258;00-0034346;00-0035228;00...
4    00-0034495;00-0035258;00-0034346;00-0035228;00...


In [90]:
''' Jonathan Taylor '''

# jt_snaps = run_data.loc[run_data['offense_players'].str.contains('00-0036223'), :].copy()
jt_runs = run_data.loc[run_data['rusher_player_id'] == '00-0036223', :].copy()

snaps = pbp_data.loc[(~pbp_data['offense_players'].isna()) & (pbp_data['offense_players'].str.contains('00-0036223')), :].shape[0]
attempts = len(jt_runs)
yards = jt_runs['yards_gained'].sum()
tds = jt_runs.loc[jt_runs['touchdown'] == 1, :].shape[0]
first_downs = jt_runs.loc[jt_runs['first_down'] == 1, :].shape[0]
successful_rushes = jt_runs.loc[jt_runs['Successful Play'], :].shape[0]

print(f'{ snaps = }')
print(f'{ attempts = }')
print(f'{ yards = }')
print(f'{ tds = }')
print(f'{ first_downs = }')
print(f'{ successful_rushes = }')
print(f'success rate: {(successful_rushes / attempts) * 100:,.2f}')

 snaps = 724
 attempts = 305
 yards = 1431.0
 tds = 11
 first_downs = 72
 successful_rushes = 139
success rate: 45.57


In [72]:
''' Receiving '''

pass_data = pbp_data.loc[pbp_data['play_type'] == 'pass', :].copy()

print(pass_data.shape)
print(pass_data['complete_pass'].sum())


(3296, 376)
2005.0


In [73]:
chase_data = pass_data.loc[pass_data['receiver_player_id'] == '00-0036900', :]

targets = len(chase_data)
receptions = chase_data['complete_pass'].sum()
yards = chase_data['yards_gained'].sum()
yac = chase_data['yards_after_catch'].sum()
# tds = chase_data['touchdown'].sum()
tds = chase_data.loc[chase_data['touchdown'] == 1, :].shape[0]
first_downs = chase_data['first_down'].sum()
successes = chase_data['Successful Play'].sum()
success_rate = successes / targets

print(f'Targets: {targets:,}')
print(f'Receptions: {receptions:,}')
print(f'Yards: {yards:,}')
print(f'YAC: {yac:,}')
print(f'TDs: {tds:,}')
print(f'1Ds: {first_downs:,}')
print(f'Successes: {successes:,}')
print(f'Success Rate: {success_rate*100:,.2f}')


Targets: 27
Receptions: 21.0
Yards: 241.0
YAC: 129.0
TDs: 1
1Ds: 12.0
Successes: 17
Success Rate: 62.96
