In [1]:
import pandas as pd
import seaborn as sns
import nflfastpy as nfl
import numpy as np
from IPython.display import HTML

In [2]:
df = nfl.load_pbp_data(2021)

df.shape

SeasonNotFoundError: Play by play data is only available from 1999 to 2020

In [3]:
df.to_csv('pbp.csv')

In [31]:
player_df = pd.concat([
    df[['receiver_player_id','posteam', 'receiver_player_name','game_id']].rename(
    columns={'receiver_player_id': 'player_id','receiver_player_name': 'player_name'}),
    df[['rusher_player_id','posteam', 'rusher_player_name']].rename(
    columns={'rusher_player_id': 'player_id','rusher_player_name': 'player_name'}),
    df[['passer_player_id','posteam', 'passer_player_name']].rename(
    columns={'passer_player_id': 'player_id','passer_player_name': 'player_name'})
]).sort_values(by='game_id', ascending=False).groupby(['player_id'])[['posteam','player_name']].first()

player_df.head()

Unnamed: 0_level_0,posteam,player_name
player_id,Unnamed: 1_level_1,Unnamed: 2_level_1
00-0019596,TB,T.Brady
00-0020531,NO,D.Brees
00-0022127,LV,J.Witten
00-0022787,ATL,M.Schaub
00-0022824,ARI,A.Lee


In [32]:
since_week = '1'

In [33]:
df['Passes']=1
df['Passes_'+since_week]= df.apply(lambda x : 1 if x['week'] >= int(since_week) else 0, axis=1)
df['Passes_Redzone'] = df.apply(lambda x : 1 if x['yardline_100'] <= 15 else 0, axis=1)
df['Passes_Redzone_'+since_week] = df.apply(lambda x : 1 if x['yardline_100'] <= 15 and x['week'] >= int(since_week) else 0, axis=1)

rec_df = df.groupby(['receiver_player_id', 'game_id'], as_index=False)[['Passes','Passes_'+since_week, 'Passes_Redzone', 'Passes_Redzone_'+since_week]].sum()

rec_df.head()

Unnamed: 0,receiver_player_id,game_id,Passes,Passes_1,Passes_Redzone,Passes_Redzone_1
0,00-0022127,2020_01_LV_CAR,1,1,1,1
1,00-0022127,2020_02_NO_LV,1,1,0,0
2,00-0022127,2020_04_BUF_LV,2,2,1,1
3,00-0022127,2020_05_LV_KC,2,2,0,0
4,00-0022127,2020_07_TB_LV,1,1,0,0


In [34]:
df['Rushes']=1
df['Rushes_'+since_week] = df.apply(lambda x : 1 if x['week'] >= int(since_week) else 0, axis=1)
df['Rushes_Redzone'] = df.apply(lambda x : 1 if x['yardline_100'] <= 15 else 0, axis=1)
df['Rushes_Redzone_'+since_week] = df.apply(lambda x : 1 if x['yardline_100'] <= 15 and x['week'] >= int(since_week) else 0, axis=1)

rush_df = df.groupby(['rusher_player_id', 'game_id'], as_index=False)[['Rushes', 'Rushes_'+since_week, 'Rushes_Redzone', 'Rushes_Redzone_'+since_week]].sum()

rush_df.head()

Unnamed: 0,rusher_player_id,game_id,Rushes,Rushes_1,Rushes_Redzone,Rushes_Redzone_1
0,00-0019596,2020_01_TB_NO,3,3,1,1
1,00-0019596,2020_02_CAR_TB,1,1,0,0
2,00-0019596,2020_03_TB_DEN,5,5,0,0
3,00-0019596,2020_04_LAC_TB,3,3,0,0
4,00-0019596,2020_05_TB_CHI,3,3,1,1


In [35]:
merged_df = pd.merge(rush_df,rec_df,how='outer',left_on=['rusher_player_id', 'game_id'], right_on=['receiver_player_id', 'game_id'])
merged_df['player_id'] = np.where(merged_df['rusher_player_id'].isnull() == True, merged_df['receiver_player_id'], merged_df['rusher_player_id'])

merged_df['plays'] = merged_df['Rushes'].fillna(0) + merged_df['Passes'].fillna(0)
merged_df['plays_'+since_week] = merged_df['Rushes_'+since_week].fillna(0) + merged_df['Passes_'+since_week].fillna(0)
merged_df['plays_Redzone'] = merged_df['Rushes_Redzone'].fillna(0) + merged_df['Passes_Redzone'].fillna(0)
merged_df['plays_Redzone_'+since_week] = merged_df['Rushes_Redzone_'+since_week].fillna(0) + merged_df['Passes_Redzone_'+since_week].fillna(0)

merged_df = merged_df[['player_id', 'game_id','plays', 'plays_'+since_week, 'plays_Redzone','plays_Redzone_'+since_week]]

merged_df.head()

Unnamed: 0,player_id,game_id,plays,plays_1,plays_Redzone,plays_Redzone_1
0,00-0019596,2020_01_TB_NO,3.0,3.0,1.0,1.0
1,00-0019596,2020_02_CAR_TB,1.0,1.0,0.0,0.0
2,00-0019596,2020_03_TB_DEN,5.0,5.0,0.0,0.0
3,00-0019596,2020_04_LAC_TB,3.0,3.0,0.0,0.0
4,00-0019596,2020_05_TB_CHI,3.0,3.0,1.0,1.0


In [36]:
plays_df = pd.merge(merged_df, merged_df.groupby('game_id').sum(), on='game_id').groupby('player_id').sum()
plays_df = plays_df.rename(columns={'plays_x': 'player_plays', 'plays_y': 'team_plays', 
                                        'plays_'+since_week+'_x': 'player_plays_'+since_week, 'plays_'+since_week+'_y': 'team_plays_'+since_week,
                                        'plays_Redzone_x': 'player_plays_Redzone', 'plays_Redzone_y': 'team_plays_Redzone',
                                        'plays_Redzone_'+since_week+'_x': 'player_plays_Redzone_'+since_week, 'plays_Redzone_'+since_week+'_y': 'team_plays_Redzone_'+since_week
                                       })
plays_df.head()

Unnamed: 0_level_0,player_plays,player_plays_1,player_plays_Redzone,player_plays_Redzone_1,team_plays,team_plays_1,team_plays_Redzone,team_plays_Redzone_1
player_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
00-0019596,43.0,43.0,7.0,7.0,1967.0,1967.0,232.0,232.0
00-0020531,23.0,23.0,4.0,4.0,1225.0,1225.0,168.0,168.0
00-0022127,17.0,17.0,4.0,4.0,1227.0,1227.0,164.0,164.0
00-0022787,3.0,3.0,0.0,0.0,120.0,120.0,17.0,17.0
00-0022921,72.0,72.0,5.0,5.0,1652.0,1652.0,184.0,184.0


In [37]:
plays_df = pd.merge(plays_df, player_df, on = ['player_id'])
plays_df = plays_df[['player_name','posteam','player_plays','team_plays', 'player_plays_'+since_week, 'team_plays_'+since_week,
                        'player_plays_Redzone', 'team_plays_Redzone', 'player_plays_Redzone_'+since_week, 'team_plays_Redzone_'+since_week]]

plays_df.head()

Unnamed: 0_level_0,player_name,posteam,player_plays,team_plays,player_plays_1,team_plays_1,player_plays_Redzone,team_plays_Redzone,player_plays_Redzone_1,team_plays_Redzone_1
player_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
00-0019596,T.Brady,TB,43.0,1967.0,43.0,1967.0,7.0,232.0,7.0,232.0
00-0020531,D.Brees,NO,23.0,1225.0,23.0,1225.0,4.0,168.0,4.0,168.0
00-0022127,J.Witten,LV,17.0,1227.0,17.0,1227.0,4.0,164.0,4.0,164.0
00-0022787,M.Schaub,ATL,3.0,120.0,3.0,120.0,0.0,17.0,0.0,17.0
00-0022921,L.Fitzgerald,ARI,72.0,1652.0,72.0,1652.0,5.0,184.0,5.0,184.0


In [38]:
plays_df['plays_pct'] = plays_df['player_plays']/plays_df['team_plays']
plays_df['plays_pct_'+since_week] = plays_df['player_plays_'+since_week]/plays_df['team_plays_'+since_week]
plays_df['plays_pct_Redzone'] = plays_df['player_plays_Redzone']/plays_df['team_plays_Redzone']
plays_df['plays_pct_Redzone_'+since_week] = plays_df['player_plays_Redzone_'+since_week]/plays_df['team_plays_Redzone_'+since_week]
plays_df.sort_values(by='player_plays', ascending=False).to_csv('plays.csv')