<font color=teal>
_______________________________________
</font>


### <font color=teal>Goal:</font>

- Merge play actions and offense/defense power scores into a play by play dataset focused on play-calling

### <font color=teal>Input:</font>

- pbp_actions.parquet
- defense_power.parquet
- offense_power.parquet


### <font color=teal>Steps:</font>
- merge offense and defense scores into each play based on which team offense and defense
- save the final play-calling dataset


### <font color=teal>Output:</font>

- store_df(df, "nfl_pbp_play_calls", db=db if COMMIT_TO_DATABASE else None, schema=SCHEMA)
-
- store_df(games_df, "nfl_pbp_game_stats", db=db if COMMIT_TO_DATABASE else None, schema=SCHEMA)



<font color=teal>
_______________________________________
</font>



### <font color=teal>imports<font/>

In [16]:
import os
import sys

sys.path.append(os.path.abspath("../src"))

In [None]:

from matplotlib import pyplot as plt
import warnings
from src import *

warnings.filterwarnings('ignore')


### <font color=teal>housekeeping<font/>

In [None]:
warnings.filterwarnings('ignore')

logger = configs.configure_logging("pbp_logger")
logger.setLevel(logging.INFO)

### <font color=teal>settings<font/>

In [20]:
db = database_loader.DatabaseLoader(get_config('connection_string'))
DEBUG=False
COMMIT_TO_DATABASE=True
SCHEMA='controls'

data_directory = get_config('data_directory')

plt.style.use('seaborn-darkgrid')


#### <font color=teal>load play_actions<font/>

In [21]:
#time

full_path = os.path.join(data_directory, "nfl_play_actions.parquet")
pbp_actions_df = pd.read_parquet(full_path)
pbp_actions_df.head()


CPU times: user 150 ms, sys: 55.2 ms, total: 205 ms
Wall time: 130 ms


Unnamed: 0,season,game_id,week,drive,down,drive_id,home_team,away_team,posteam,defteam,...,rush_attempt,kickoff_attempt,punt_attempt,field_goal_attempt,two_point_attempt,extra_point_attempt,timeout,penalty,qb_spike,desc
0,2016,2016_01_MIN_TEN,1,0.0,0.0,2016_01_MIN_TEN_0,TEN,MIN,MIN,TEN,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,"3-B.Walsh extra point is GOOD, Center-47-K.McD..."
1,2016,2016_01_MIN_TEN,1,0.0,0.0,2016_01_MIN_TEN_0,TEN,MIN,MIN,TEN,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,"3-B.Walsh extra point is No Good, Wide Right, ..."
2,2016,2016_01_CLE_PHI,1,1.0,0.0,2016_01_CLE_PHI_1,PHI,CLE,PHI,CLE,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,"6-C.Sturgis extra point is GOOD, Center-46-J.D..."
3,2016,2016_01_NYG_DAL,1,1.0,1.0,2016_01_NYG_DAL_1,DAL,NYG,DAL,NYG,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,(9:21) (Shotgun) 4-D.Prescott pass short right...
4,2016,2016_01_CAR_DEN,1,1.0,1.0,2016_01_CAR_DEN_1,DEN,CAR,DEN,CAR,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,(13:42) 13-T.Siemian pass short right to 10-E....


#### load offense stats

In [22]:
#time
full_path = os.path.join(data_directory, "nfl_weekly_offense_ml.parquet")
offense_powers_df = pd.read_parquet(full_path)
offense_powers_df = offense_powers_df[['season', 'week', 'team', 'offense_power']]
offense_powers_df.head()

CPU times: user 7.16 ms, sys: 3.12 ms, total: 10.3 ms
Wall time: 5.3 ms


Unnamed: 0_level_0,season,week,team,offense_power
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,2016,1,ARI,27.007937
1,2016,2,ARI,30.113262
2,2016,3,ARI,29.11238
3,2016,4,ARI,29.840328
4,2016,5,ARI,26.235002


#### load defense stats

In [23]:
#time
full_path = os.path.join(data_directory, "nfl_weekly_defense_ml.parquet")
defense_powers_df = pd.read_parquet(full_path)
defense_powers_df = defense_powers_df[['season', 'week', 'team', 'defense_power']]
defense_powers_df.head()

CPU times: user 4.58 ms, sys: 2.73 ms, total: 7.31 ms
Wall time: 4.6 ms


Unnamed: 0_level_0,season,week,team,defense_power
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,2016,1,ARI,6.173816
1,2016,2,ARI,5.540982
2,2016,3,ARI,8.111671
3,2016,4,ARI,5.962334
4,2016,5,ARI,7.655234


##### merge into play actions: team in position's offense power and defense power (offense_op, offense_dp)

In [24]:
from src.utils import assert_and_alert
from src.inline_validation import perform_inline_play_action_tests


def drop_extras(df: pd.DataFrame):
    drops = ['team']
    for col in df.columns.values:
        if str(col).endswith("_y") or str(col).endswith("_x"):
            drops.append(col)
    if len(drops) > 0:
        df.drop(columns=drops, inplace=True)


def merge_powers(action_df, powers_df, left_on, renames=None, msg='play_counter'):
    expected_shape = action_df.shape
    _df = pd.merge(action_df, powers_df, left_on=left_on, right_on=['season', 'week', 'team']).drop_duplicates()
    drop_extras(_df)
    _df.rename(columns=renames, inplace=True)
    perform_inline_play_action_tests(_df, msg=msg)
    assert_and_alert(expected_shape[0] == _df.shape[0],
                     msg=f"merge of actions to offense power changed the row count {pbp_actions_df.shape} + {offense_powers_df.shape} ==> {_df.shape}")
    return _df



In [25]:

df = merge_powers(pbp_actions_df, offense_powers_df, left_on=['season', 'week', 'posteam'],
                  renames={'offense_power': 'offense_op'}, msg="merging offense_OP")

df = merge_powers(df, defense_powers_df, left_on=['season', 'week', 'posteam'], renames={'defense_power': 'offense_dp'},
                  msg="merging offense_DP")

df = merge_powers(df, offense_powers_df, left_on=['season', 'week', 'defteam'], renames={'offense_power': 'defense_op'},
                  msg="merging defense_OP")

df = merge_powers(df, defense_powers_df, left_on=['season', 'week', 'defteam'], renames={'defense_power': 'defense_dp'},
                  msg="merging defense_DP")



2023-07-18 12:52:12,521 - INFO - Validating game 2016_01_BUF_BAL values at location: merging offense_OP...
2023-07-18 12:52:12,967 - INFO - Validating game 2016_01_BUF_BAL values at location: merging offense_DP...
2023-07-18 12:52:13,403 - INFO - Validating game 2016_01_BUF_BAL values at location: merging defense_OP...
2023-07-18 12:52:13,830 - INFO - Validating game 2016_01_BUF_BAL values at location: merging defense_DP...


In [39]:
import numpy as np
import pandas as pd

# Assuming you have a DataFrame named 'nfl_pbp_play_calls' from 'controls' namespace
df['point_spread'] = df['posteam_final_score'] - df['defteam_final_score']
# Group by the desired columns and calculate aggregations
grouped_df = df.groupby(['season', 'week', 'game_id', 'posteam', 'defteam']).agg(
    drive_count=('drive', 'count'),
    first_downs=('down', lambda x: (x == 1).sum()),
    point_spread=('point_spread', 'max'),  # Calculate the point spread explicitly
    team_final_score=('posteam_final_score', 'max'),
    opposing_team_final_score=('defteam_final_score', 'max'),
    yards_gained=('yards_gained', 'sum'),
    pass_attempts=('pass_attempt', 'sum'),
    rush_attempts=('rush_attempt', 'sum'),
    kickoff_attempt=('kickoff_attempt', 'sum'),
    punt_attempt=('punt_attempt', 'sum'),
    field_goal_attempt=('field_goal_attempt', 'sum'),
    two_point_attempt=('two_point_attempt', 'sum'),
    extra_point_attempt=('extra_point_attempt', 'sum'),
    timeout=('timeout', 'sum'),
    penalty=('penalty', 'sum'),
    qb_spike=('qb_spike', 'sum'),
    team_offense_power=('offense_op', 'mean'),
    team_defense_power=('offense_dp', 'mean'),
    opposing_team_offense_power=('defense_op', 'mean'),
    opposing_team_defense_power=('defense_dp', 'mean')
)

# Reset the index to transform the grouped DataFrame back to a regular DataFrame
grouped_df.reset_index(inplace=True)

# Select the desired columns for the final result
games_df = grouped_df[['season', 'week', 'game_id', 'posteam', 'defteam',
                        'team_offense_power', 'team_defense_power', 'opposing_team_offense_power', 'opposing_team_defense_power',
                        'point_spread',  'drive_count', 'first_downs', 'team_final_score',
                        'opposing_team_final_score', 'yards_gained', 'pass_attempts', 'rush_attempts',
                        'kickoff_attempt', 'punt_attempt', 'field_goal_attempt', 'two_point_attempt',
                        'extra_point_attempt', 'timeout', 'penalty', 'qb_spike']]

games_df.rename(columns={'posteam': 'team', 'defteam': 'opposing_team'}, inplace=True)

# Create a new column 'loss_tie_win' based on conditions
games_df['loss_tie_win'] = np.where(
    games_df['point_spread'] > 0, 2,
    np.where(
        games_df['point_spread'] < 0, 0, 1 )
)

games_df['team_power_sum'] = games_df['team_offense_power'] + games_df['team_defense_power']
games_df['opposing_team_power_sum'] = games_df['opposing_team_offense_power'] + games_df['opposing_team_defense_power']
games_df['power_difference'] = games_df['team_power_sum'] - games_df['opposing_team_power_sum']
games_df['point_spread'] = games_df['point_spread'].astype('float')
games_df[['team_offense_power', 'team_defense_power', 'opposing_team_offense_power', 'opposing_team_defense_power',  'team_power_sum' ,'opposing_team_power_sum', 'power_difference' ]].head()

Unnamed: 0,team_offense_power,team_defense_power,opposing_team_offense_power,opposing_team_defense_power,team_power_sum,opposing_team_power_sum,power_difference
0,23.428697,5.476946,17.649834,6.35367,28.905643,24.003503,4.90214
1,17.649834,6.35367,23.428697,5.476946,24.003503,28.905643,-4.90214
2,22.940911,5.430129,17.864515,6.617459,28.371039,24.481975,3.889065
3,17.864515,6.617459,22.940911,5.430129,24.481975,28.371039,-3.889065
4,22.804072,8.374514,29.423198,6.710474,31.178586,36.133672,-4.955087


##### merge into play actions: team on defense offense power and defense power (defense_op, defense_dp)

#### save features dataset

In [40]:
#time
from src.db_utils import store_df

# store_df(df, "nfl_pbp_play_calls", db=db if COMMIT_TO_DATABASE else None, schema=SCHEMA)
store_df(games_df, "nfl_pbp_game_stats", db=db if COMMIT_TO_DATABASE else None, schema=SCHEMA)

2023-07-18 13:47:37,982 - INFO - writing file nfl_pbp_play_calls  to /Users/christopherlomeli/Source/courses/datascience/Springboard/capstone/NFL/NFLVersReader/data/nfl/nfl_pbp_play_calls.parquet
2023-07-18 13:47:38,328 - INFO - writing table nfl_pbp_play_calls in schema controls
2023-07-18 13:48:05,167 - INFO - writing file nfl_pbp_game_stats  to /Users/christopherlomeli/Source/courses/datascience/Springboard/capstone/NFL/NFLVersReader/data/nfl/nfl_pbp_game_stats.parquet
2023-07-18 13:48:05,182 - INFO - writing table nfl_pbp_game_stats in schema controls


CPU times: user 17.6 s, sys: 1.27 s, total: 18.8 s
Wall time: 27.7 s


---