In [1]:
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import pandas as pd
import numpy as np

In [2]:
"""
master_input.parquet
master_output.parquet
play_level.parquet
player_analysis.parquet
spatial_features.parquet
supplementary.parquet
trajectories.parquet
"""
base_dir = "../consolidated_data/"

In [118]:
game_id = '2023090700'
play_id = '1679'
nfl_id = '46243'
play_keys = ['game_id', 'play_id']
player_keys = play_keys + ['nfl_id']
player_dtls = ['player_name', 'player_height', 'player_weight', 'player_birth_date', 'player_position', 'player_side', 'player_role', 'num_frames_output']
player_mvmt = ['frame_id', 'x', 'y']

input_cols = player_keys + player_dtls + player_mvmt + ['player_to_predict', 'play_direction', 'absolute_yardline_number', 's', 'a', 'dir', 'o', 'num_frames_output', 'ball_land_x', 'ball_land_y', 'week']
input_df = pd.read_parquet(f'{base_dir}/master_input.parquet')

output_cols = player_keys + player_mvmt
output_df = pd.read_parquet(f'{base_dir}/master_output.parquet')

supp_df = pd.read_parquet(f'{base_dir}/supplementary.parquet')

In [104]:
supp_df.columns

Index(['game_id', 'season', 'week', 'game_date', 'game_time_eastern',
       'home_team_abbr', 'visitor_team_abbr', 'play_id', 'play_description',
       'quarter', 'game_clock', 'down', 'yards_to_go', 'possession_team',
       'defensive_team', 'yardline_side', 'yardline_number',
       'pre_snap_home_score', 'pre_snap_visitor_score',
       'play_nullified_by_penalty', 'pass_result', 'pass_length',
       'offense_formation', 'receiver_alignment', 'route_of_targeted_receiver',
       'play_action', 'dropback_type', 'dropback_distance',
       'pass_location_type', 'defenders_in_the_box', 'team_coverage_man_zone',
       'team_coverage_type', 'penalty_yards', 'pre_penalty_yards_gained',
       'yards_gained', 'expected_points', 'expected_points_added',
       'pre_snap_home_team_win_probability',
       'pre_snap_visitor_team_win_probability',
       'home_team_win_probability_added', 'visitor_team_win_probility_added'],
      dtype='object')

In [52]:
pd.merge(input_df, supp_df, on=['game_id', 'play_id'], how='left')

game_id                                  4880579
play_id                                  4880579
player_to_predict                        4880579
nfl_id                                   4880579
frame_id                                 4880579
                                          ...   
expected_points_added                    4880579
pre_snap_home_team_win_probability       4880579
pre_snap_visitor_team_win_probability    4880579
home_team_win_probability_added          4880579
visitor_team_win_probility_added         4880579
Length: 63, dtype: int64

In [5]:
output_xdf = pd.merge(
    output_df[output_cols], 
    input_df[player_keys + player_dtls].drop_duplicates(), 
    on=player_keys, 
    how='left'
)

In [67]:
hour = supp_df['game_clock'].iloc[0].split(':')[0]

In [146]:
season = 2023
team = 'CIN'

# build predicate builder based on criteria for easy searching
games_df = supp_df.query(f"season == {season} and possession_team == '{team}'").sort_values('week')
games_df[['game_id', 'week', 'home_team_abbr', 'visitor_team_abbr']].drop_duplicates()

Unnamed: 0,game_id,week,home_team_abbr,visitor_team_abbr
140,2023091002,1,CLE,CIN
992,2023091702,2,CIN,BAL
2544,2023092501,3,CIN,LA
3075,2023100109,4,TEN,CIN
3835,2023100807,5,ARI,CIN
4309,2023101503,6,CIN,SEA
6281,2023102912,8,SF,CIN
7029,2023110511,9,CIN,BUF
7288,2023111202,10,CIN,HOU
7867,2023111600,11,BAL,CIN


In [170]:
game_id = 2023100109
quarter = 1
minute = 11
cols = play_keys + ['game_clock', 'pass_result', 'yards_gained']

game_df = games_df.query(f"game_id == {game_id} and quarter == {quarter}").loc[supp_df['game_clock'].str.split(':').str[0] == f'{str(minute).zfill(2)}'][cols]
game_df

Unnamed: 0,game_id,play_id,game_clock,pass_result,yards_gained
3056,2023100109,201,11:17,C,10


In [168]:
# helper function
import math
def distance(x1, y1, x2, y2):
    xcmp = (x2 - x1) ** 2
    ycmp = (y2 - y1) ** 2
    return math.sqrt(xcmp + ycmp)

In [162]:
passer = play_df.loc[play_df['player_role'] == 'Passer'][['x', 'y']].iloc[-1]
ball = play_df[['ball_land_x', 'ball_land_y']].iloc[-1]

In [169]:
distance(passer.iloc[0], passer.iloc[1], ball.iloc[0], ball.iloc[1])

40.972777940102