In [29]:
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import pandas as pd
import numpy as np
import math

"""
master_input.parquet
master_output.parquet
play_level.parquet
player_analysis.parquet
spatial_features.parquet
supplementary.parquet
trajectories.parquet
"""
base_dir = "../consolidated_data/"

In [73]:
game_id = '2023090700'
play_id = '1679'
nfl_id = '46243'
play_keys = ['game_id', 'play_id']
player_keys = play_keys + ['nfl_id']
player_dtls = ['player_name', 'player_height', 'player_weight', 'player_birth_date', 'player_position', 'player_side', 'player_role', 'num_frames_output']
player_mvmt = ['frame_id', 'x', 'y']

input_cols = player_keys + player_dtls + player_mvmt + ['player_to_predict', 'play_direction', 'absolute_yardline_number', 's', 'a', 'dir', 'o', 'num_frames_output', 'ball_land_x', 'ball_land_y', 'week']
input_df = pd.read_parquet(f'{base_dir}/master_input.parquet')

output_cols = player_keys + player_mvmt
output_df = pd.read_parquet(f'{base_dir}/master_output.parquet')

supp_df = pd.read_parquet(f'{base_dir}/supplementary.parquet')

game_df = pd.merge(input_df, supp_df, on=['game_id', 'play_id'], how='left')
game_df["num_frames_input"] = game_df.groupby(["game_id", "play_id", "nfl_id"])["frame_id"].transform('max')
output_xdf = pd.merge(
    output_df[output_cols], 
    input_df[player_keys + player_dtls].drop_duplicates(), 
    on=player_keys, 
    how='left'
)

output_sxdf = pd.merge(output_xdf, supp_df, on=play_keys, how="left")

In [7]:
# need player based stats
# can start by considering 1 game and then how that player performed
# metric_1: separation from nearest defender
# - at what point?
# - point of ball throw
# - point of ball landing / catch
# - we can determine this by finding those points in time and the separation
# - factors to consider - how many defenders?
# - we can consider the nearest defenders as defined by the prediction in the output
# - we can also consider the route run
# - depth of catch - orthogonal distance from LOS
# - change of direction (curl) vs straight (cross / go)
# - first will find in a single game.

# ab = at ball at final output location
# at = at throw

In [75]:
output_sxdf.columns

Index(['game_id', 'play_id', 'nfl_id', 'frame_id', 'x', 'y', 'player_name',
       'player_height', 'player_weight', 'player_birth_date',
       'player_position', 'player_side', 'player_role', 'num_frames_output',
       'season', 'week', 'game_date', 'game_time_eastern', 'home_team_abbr',
       'visitor_team_abbr', 'play_description', 'quarter', 'game_clock',
       'down', 'yards_to_go', 'possession_team', 'defensive_team',
       'yardline_side', 'yardline_number', 'pre_snap_home_score',
       'pre_snap_visitor_score', 'play_nullified_by_penalty', 'pass_result',
       'pass_length', 'offense_formation', 'receiver_alignment',
       'route_of_targeted_receiver', 'play_action', 'dropback_type',
       'dropback_distance', 'pass_location_type', 'defenders_in_the_box',
       'team_coverage_man_zone', 'team_coverage_type', 'penalty_yards',
       'pre_penalty_yards_gained', 'yards_gained', 'expected_points',
       'expected_points_added', 'pre_snap_home_team_win_probability',
     

In [69]:
single_game_df = game_df.query("game_id == 2023090700")
single_play_df = single_game_df.query("player_to_predict == True and frame_id == num_frames_input")[["game_id", "play_id", "nfl_id", "x", "y", "frame_id", "pass_result", "player_side", "player_role", "num_frames_input", "num_frames_output"]]
off_df = single_play_df.query("player_role == 'Targeted Receiver'")[["game_id", "play_id", "nfl_id", "pass_result", "x", "y"]]
def_df = single_play_df.query("player_role == 'Defensive Coverage'")[["game_id", "play_id", "nfl_id", "pass_result", "x", "y"]]
over_df = pd.merge(off_df, def_df.rename(columns={"x": "def_x", "y": "def_y", "nfl_id": "def_nfl_id"}), on=["game_id", "play_id", "pass_result"], how="left")

In [76]:
osingle_game_df = output_sxdf.query("game_id == 2023090700")
osingle_play_df = single_game_df.query("frame_id == num_frames_output")[["game_id", "play_id", "nfl_id", "x", "y", "frame_id", "pass_result", "player_side", "player_role"]]
ooff_df = osingle_play_df.query("player_role == 'Targeted Receiver'")[["game_id", "play_id", "nfl_id", "pass_result", "x", "y"]]
odef_df = osingle_play_df.query("player_role == 'Defensive Coverage'")[["game_id", "play_id", "nfl_id", "pass_result", "x", "y"]]
oover_df = pd.merge(ooff_df, odef_df.rename(columns={"x": "def_x", "y": "def_y", "nfl_id": "def_nfl_id"}), on=["game_id", "play_id", "pass_result"], how="left")

In [95]:
def distance(Series):
    x, off_x = Series['def_x'], Series['x']
    y, off_y = Series['def_y'], Series['y']
    return math.sqrt((x - off_x) ** 2 + (y - off_y) ** 2)

over_df["off_separation"] = over_df.apply(distance, axis=1)
instats = over_df.groupby(
    ["game_id", "nfl_id", "pass_result"]
)["off_separation"].agg(
    ['min', 'max', 'mean']
).rename(columns={"min": "min_off_sep_at", "max": "max_off_sep_at", "mean": "mean_off_sep_at"})

In [96]:
oover_df["off_separation"] = over_df.apply(distance, axis=1)
outstats = oover_df.groupby(
    ["game_id", "nfl_id", "pass_result"]
)["off_separation"].agg(
    ['min', 'max', 'mean']
).rename(columns={"min": "min_off_sep_ab", "max": "max_off_sep_ab", "mean": "mean_off_sep_ab"})

In [78]:
players = single_game_df[["nfl_id", "player_name"]].sort_values("nfl_id").drop_duplicates()

In [101]:
inout_df = pd.merge(instats, outstats, on=["game_id", "nfl_id", "pass_result"])

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,min_off_sep_at,max_off_sep_at,mean_off_sep_at,min_off_sep_ab,max_off_sep_ab,mean_off_sep_ab
game_id,nfl_id,pass_result,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2023090700,38696,C,2.180138,18.205606,10.192872,1.387083,7.227565,3.556552
2023090700,38696,I,0.87367,15.735543,7.503947,2.90663,13.917475,8.282822
2023090700,41325,C,1.883215,12.58223,9.18194,1.883215,18.205606,6.596436
2023090700,41325,I,2.33118,2.33118,2.33118,,,
2023090700,42460,C,3.41299,6.384497,5.35447,,,
2023090700,42460,I,1.318939,10.914083,5.820781,,,
2023090700,43584,C,4.709522,13.917475,10.52322,,,
2023090700,44930,C,1.289961,17.677831,8.35549,3.658538,14.331846,8.845561
2023090700,44930,I,4.735652,4.89418,4.814916,4.735652,12.58223,8.894876
2023090700,46213,C,2.078485,13.146992,7.392534,0.514782,27.575409,11.166344
