<font color=teal>
_______________________________________
</font>


### <font color=teal>Goal:</font>

- Merge play actions and offense/defense power scores into a play by play dataset focused on play-calling

### <font color=teal>Input:</font>

- pbp_actions.parquet
- defense_power.parquet
- offense_power.parquet


### <font color=teal>Steps:</font>
- merge offense and defense scores into each play based on which team offense and defense
- save the final play-calling dataset


### <font color=teal>Code:</font>
- /src module



### <font color=teal>Output:</font>

- nfl_pbp_play_calls.parquet



<font color=teal>
_______________________________________
</font>



### imports

In [40]:
import os
import sys

sys.path.append(os.path.abspath("../src"))

In [41]:

from matplotlib import pyplot as plt
import seaborn as sns
import warnings
from src.utils import assert_and_alert
import numpy as np
import pandas as pd

warnings.filterwarnings('ignore')


In [42]:
from  src import *



### housekeeping

In [43]:
warnings.filterwarnings('ignore')

logger = configs.configure_logging("pbp_logger")
logger.setLevel(logging.INFO)

### set flags

In [44]:
DEBUG = False
SCHEMA = 'controls'

data_directory = get_config('data_directory')

plt.style.use('seaborn-darkgrid')


#### load play_actions
We'll have an X, and y set plus the original stats_df datasets, which we'll use going forward

In [45]:
%%time
# /Users/christopherlomeli/Source/courses/datascience/Springboard/capstone/NFL/NFLVersReader/data/nfl/nlf_play_actions.parquet
full_path = os.path.join(data_directory, "nfl_play_actions.parquet")
pbp_actions_df = pd.read_parquet(full_path)
pbp_actions_df.head()


CPU times: user 108 ms, sys: 74.5 ms, total: 182 ms
Wall time: 122 ms


Unnamed: 0,season,game_id,week,drive,down,drive_id,posteam,defteam,action,posteam_score,...,rush_attempt,kickoff_attempt,punt_attempt,field_goal_attempt,two_point_attempt,extra_point_attempt,timeout,penalty,qb_spike,desc
0,2016,2016_01_MIN_TEN,1,0.0,0.0,2016_01_MIN_TEN_0,MIN,TEN,extra_point,21.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,"3-B.Walsh extra point is GOOD, Center-47-K.McD..."
1,2016,2016_01_MIN_TEN,1,0.0,0.0,2016_01_MIN_TEN_0,MIN,TEN,extra_point,12.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,"3-B.Walsh extra point is No Good, Wide Right, ..."
2,2016,2016_01_CLE_PHI,1,1.0,0.0,2016_01_CLE_PHI_1,PHI,CLE,extra_point,6.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,"6-C.Sturgis extra point is GOOD, Center-46-J.D..."
3,2016,2016_01_DET_IND,1,1.0,1.0,2016_01_DET_IND_1,IND,DET,pass,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,(15:00) (Run formation) 12-A.Luck pass short m...
4,2016,2016_01_NYG_DAL,1,1.0,1.0,2016_01_NYG_DAL_1,DAL,NYG,pass,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,(9:21) (Shotgun) 4-D.Prescott pass short right...


#### load offense stats

In [46]:
%%time
full_path = os.path.join(data_directory, "nfl_weekly_offense_ml.parquet")
offense_powers_df = pd.read_parquet(full_path)
offense_powers_df = offense_powers_df[['season', 'week', 'team', 'offense_power']]
offense_powers_df.head()

CPU times: user 6.62 ms, sys: 2.99 ms, total: 9.61 ms
Wall time: 5.46 ms


Unnamed: 0_level_0,season,week,team,offense_power
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,2016,1,ARI,32.768561
1,2016,2,ARI,36.522021
2,2016,3,ARI,35.382326
3,2016,4,ARI,36.997307
4,2016,5,ARI,30.315118


#### load defense stats

In [47]:
%%time
full_path = os.path.join(data_directory, "nfl_weekly_defense_ml.parquet")
defense_powers_df = pd.read_parquet(full_path)
defense_powers_df = defense_powers_df[['season', 'week', 'team', 'defense_power']]
defense_powers_df.head()

CPU times: user 4.56 ms, sys: 1.9 ms, total: 6.46 ms
Wall time: 4.19 ms


Unnamed: 0_level_0,season,week,team,defense_power
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,2016,1,ARI,5.655132
1,2016,2,ARI,5.097479
2,2016,3,ARI,7.627174
3,2016,4,ARI,5.55258
4,2016,5,ARI,7.056463


#### merge power stats into play action

In [48]:
def drop_extras(df: pd.DataFrame):
    drops=['team']
    for col in df.columns.values:
        if str(col).endswith("_y") or str(col).endswith("_x"):
            drops.append(col)
    if len(drops) > 0:
        df.drop(columns=drops, inplace=True)


##### merge into play actions: team in position's offense power and defense power (offense_op, offense_dp)

In [49]:

from src.utils import assert_and_alert

df = pd.merge(pbp_actions_df, offense_powers_df, left_on=['season', 'week', 'posteam'], right_on=['season', 'week', 'team']).drop_duplicates()
drop_extras(df)
df = pd.merge(df, defense_powers_df, left_on=['season', 'week', 'posteam'], right_on=['season', 'week', 'team']).drop_duplicates()
drop_extras(df)
df.rename(columns={'offense_power': 'offense_op', 'defense_power': 'offense_dp'}, inplace=True)


print(f"merge shapes {pbp_actions_df.shape} + {offense_powers_df.shape} ==> {df.shape}")
assert_and_alert(pbp_actions_df.shape[0]==df.shape[0], msg=f"merge of actions to offense power changed the row count {pbp_actions_df.shape} + {offense_powers_df.shape} ==> {df.shape}")


merge shapes (209872, 36) + (3812, 4) ==> (209872, 38)


True

##### merge into play actions: team on defense offense power and defense power (defense_op, defense_dp)

In [50]:
from src.utils import assert_and_alert

df = pd.merge(df, offense_powers_df, left_on=['season', 'week', 'defteam'], right_on=['season', 'week', 'team']).drop_duplicates()
drop_extras(df)
df = pd.merge(df, defense_powers_df, left_on=['season', 'week', 'defteam'], right_on=['season', 'week', 'team']).drop_duplicates()
drop_extras(df)
df.rename(columns={'offense_power': 'defense_op', 'defense_power': 'defense_dp'}, inplace=True)


print(f"merge shapes {pbp_actions_df.shape} + {offense_powers_df.shape} ==> {df.shape}")
assert_and_alert(pbp_actions_df.shape[0]==df.shape[0], msg=f"merge of actions to offense power changed the row count {pbp_actions_df.shape} + {offense_powers_df.shape} ==> {df.shape}")



merge shapes (209872, 36) + (3812, 4) ==> (209872, 40)


True

In [51]:
df[['season', 'week', 'game_id', 'drive', 'posteam', 'defteam', 'action', 'offense_op', 'offense_dp', 'defense_op', 'defense_dp']].head()

Unnamed: 0,season,week,game_id,drive,posteam,defteam,action,offense_op,offense_dp,defense_op,defense_dp
0,2016,1,2016_01_MIN_TEN,0.0,MIN,TEN,extra_point,29.927266,4.097988,29.507986,5.102293
1,2016,1,2016_01_MIN_TEN,0.0,MIN,TEN,extra_point,29.927266,4.097988,29.507986,5.102293
2,2016,1,2016_01_MIN_TEN,2.0,MIN,TEN,rush,29.927266,4.097988,29.507986,5.102293
3,2016,1,2016_01_MIN_TEN,2.0,MIN,TEN,pass,29.927266,4.097988,29.507986,5.102293
4,2016,1,2016_01_MIN_TEN,4.0,MIN,TEN,rush,29.927266,4.097988,29.507986,5.102293


In [36]:
print("validate expected results from one drive")
test_df = df.loc[(df.season==2016) & (df.week==1) & (df.drive==7) & (df.posteam=='BAL')].sort_values(by=['play_counter'])
assert 7 == test_df.points_gained.sum()
assert len(test_df) == 5
assert 80 == test_df.yards_gained.sum()
merged_shape = df.shape
print(merged_shape)
test_df[['season', 'week', 'game_id', 'drive', 'posteam', 'defteam', 'action', 'offense_op', 'offense_dp', 'defense_op', 'defense_dp']]

validate expected results from one drive
(209872, 40)


Unnamed: 0,season,week,game_id,drive,posteam,defteam,action,offense_op,offense_dp,defense_op,defense_dp
511,2016,1,2016_01_BUF_BAL,7.0,BAL,BUF,pass,28.390376,5.089803,19.877457,5.827629
512,2016,1,2016_01_BUF_BAL,7.0,BAL,BUF,rush,28.390376,5.089803,19.877457,5.827629
513,2016,1,2016_01_BUF_BAL,7.0,BAL,BUF,pass,28.390376,5.089803,19.877457,5.827629
514,2016,1,2016_01_BUF_BAL,7.0,BAL,BUF,pass,28.390376,5.089803,19.877457,5.827629
510,2016,1,2016_01_BUF_BAL,7.0,BAL,BUF,extra_point,28.390376,5.089803,19.877457,5.827629


#### save features dataset

In [37]:
%%time
full_path = os.path.join(data_directory, "nfl_pbp_play_calls.parquet")
df.to_parquet(full_path)

CPU times: user 204 ms, sys: 29.9 ms, total: 234 ms
Wall time: 224 ms


---