In [115]:
pip install scipy

Collecting scipy
  Downloading scipy-1.16.2-cp313-cp313-macosx_14_0_arm64.whl.metadata (62 kB)
Downloading scipy-1.16.2-cp313-cp313-macosx_14_0_arm64.whl (20.9 MB)
[2K   [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m20.9/20.9 MB[0m [31m13.0 MB/s[0m  [33m0:00:01[0m3.4 MB/s[0m eta [36m0:00:01[0m
[?25hInstalling collected packages: scipy
Successfully installed scipy-1.16.2
Note: you may need to restart the kernel to use updated packages.


In [116]:
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import pandas as pd
import numpy as np
import math
from matplotlib.patches import Rectangle, Circle
from textwrap import wrap
from scipy.spatial.distance import pdist, squareform
from scipy.spatial import ConvexHull


"""
master_input.parquet
master_output.parquet
play_level.parquet
player_analysis.parquet
spatial_features.parquet
supplementary.parquet
trajectories.parquet
"""
base_dir = "../consolidated_data/"

# Sample Plot
# if frame_id == 10:
#     plt.scatter(x, y, marker='o')
#     plt.xlim(0, 120)
#     plt.ylim(0, 53.3)

Want to measure the movement, frame by frame of each offensive player against every other offensive player with respect to how they are spacing the field. We'll have to eventually put each player against each other player for every play for every frame. We'll focus on one play first and then expand from there.

What are we trying to observe?
- We are trying to observe how each "Other" receiver is affecting the route of the "Target" receiver.
- Are they creating space?
- Are they creating confusion?
- Are they crowding each other?
- QB options - are multiple receivers open?
- Time to throw

Theory: Receivers should be trying to create separation. If they are "bunch" formation, what is the goal?
Picks and rubs (conceptually the same) - only legal within 1 yard. Don't focus on this aspect first - but need to be able to pick up on these types of dynamics within the framework.

What level of detail do we need?
- We would need a view with target receiver aginst every other receiver (spread index - how spaced out are the receivers: 0 for full bunch 1 for full spread on each frame)
- We would need each receiver against every close DB
- We need coordinate details, spatial information
- We need speed, orientation, acceleration for each player
- nfl_id, play_id, game_id level detail

Columns: player_level_keys + movement_details

In [7]:
play_keys = ['game_id', 'play_id']
player_keys = play_keys + ['nfl_id']
player_dtls = ['player_name', 'player_position', 'player_side', 'player_role']
player_dtls_ext = player_dtls + ['player_to_predict', 'player_height', 'player_weight', 'player_birth_date', 'num_frames_output']
player_mvmt = ['frame_id', 'x', 'y']
player_mvmt_ext = player_mvmt + ['play_direction', 's', 'a', 'dir', 'o']
input_cols = player_keys + player_dtls_ext + player_mvmt_ext + ['absolute_yardline_number', 'ball_land_x', 'ball_land_y', 'week']
output_cols = player_keys + player_mvmt

# field extents
xmin, ymin = 0, 0
xmax, ymax = 120, 53.3

input_df = pd.read_parquet(f'{base_dir}/master_input.parquet')
supp_df = pd.read_parquet(f'{base_dir}/supplementary.parquet')
output_df = pd.read_parquet(f'{base_dir}/master_output.parquet')

In [163]:
# let's first consider 1 play
frame_keys = player_keys + ['frame_id']
game_id, play_id = 2023090700, 1679
play_df = input_df.loc[((input_df['game_id'] == game_id) & (input_df['play_id'] == play_id))]
offense = play_df.loc[(play_df['player_side'] == 'Offense'), player_keys + player_dtls + player_mvmt_ext]
defense = play_df.loc[(play_df['player_side'] == 'Defense'), player_keys + player_dtls + player_mvmt_ext]

In [262]:
grouped = offense.groupby('nfl_id')
num_input_frames = max(offense['frame_id'])
player_positions = {}

for key, gdf in grouped:
    for i in range(1, num_input_frames + 1):
        row = gdf.loc[gdf['frame_id'] == i]
        x = row['x'].iloc[0]
        y = row['y'].iloc[0]
        if key not in player_positions:
            player_positions[key] = []
        player_positions[key].append((x, y))

offense[['nfl_id', 'player_name']].drop_duplicates()

Unnamed: 0,nfl_id,player_name
7769,41325,Jerick McKinnon
7800,46213,Justin Watson
7831,53591,Noah Gray
7862,54519,Skyy Moore
7893,44822,Patrick Mahomes
7924,46243,Marquez Valdes-Scantling


In [267]:
num_players = len(player_positions.keys())
player_contrib = {}
play_player_contrib = {}
points = {}

for i in range(num_input_frames):
    for player_id in player_positions:
        if i not in points:
            points[i] = []
        new_point = player_positions[player_id][i]
        points[i].append(new_point)
        if i != 0:
            prev = points[i - 1][:]
            hull_1 = ConvexHull(prev)
            prev.append(new_point)
            hull_2 = ConvexHull(prev)
            if player_id not in player_contrib:
                player_contrib[player_id] = []
            player_contrib[player_id].append(hull_2.volume - hull_1.volume)

for player in player_contrib:
    if player not in play_player_contrib:
        play_player_contrib[player] = []
    play_player_contrib[player].append(sum(player_contrib[player]))

player_contrib

{41325: [0.0,
  0.0,
  0.0,
  0.06755000000002553,
  0.044399999999996,
  0.29945000000002864,
  0.4487999999999772,
  0.6887999999999721,
  0.8554500000000047,
  1.2378000000000213,
  1.4628999999999621,
  1.749550000000042,
  1.9411499999999933,
  2.026299999999992,
  2.01034999999996,
  2.2055999999999756,
  2.353100000000012,
  2.358450000000005,
  2.512000000000029,
  2.6958499999999503,
  2.9113000000000113,
  3.124050000000011,
  3.56454999999994,
  3.904399999999896,
  4.176649999999995,
  4.501099999999951,
  4.787449999999922,
  4.954350000000034,
  5.124599999999987,
  5.258950000000141],
 44822: [0.0,
  0.09099999999997266,
  0.003650000000000375,
  0.24349999999997607,
  0.4098500000000129,
  0.7579000000000846,
  1.1064499999999384,
  1.5465000000000089,
  1.7121500000000083,
  2.0873000000000417,
  2.5200499999999835,
  2.8250500000000756,
  3.0744999999999862,
  3.371600000000001,
  3.656449999999978,
  3.890999999999906,
  3.94795000000002,
  4.1067499999998915,
  3.95

In [241]:
# How much the offense spread we can normalize this over the total area of the field
# if we divide by total area to get a score
# divide by number of receivers? take the top off the defense - just fast runner over the top - how does this affect stress factor
# if they spread the field more, what does that mean?
# we can calculate "stress" how much does the defense have to adjust to spread
# is there any correlation between this spread and the success of a pass?

off_charea = []
def_charea = []

off_positions = {}
def_positions = {}

offense_df = offense.loc[offense['player_role'].isin(['Other Route Runner', 'Targeted Receiver'])]#, 'Passer'])]
defense_df = defense

for frame_id, frame_df in offense_df.groupby('frame_id'):
    off_positions[frame_id] = list(zip(frame_df['x'], frame_df['y']))
    positions = off_positions[frame_id]
    hull = ConvexHull(positions)
    area = hull.volume
    off_charea.append({
        'frame_id': frame_id,
        'off_convex_hull_area': area
    })

for frame_id, frame_df in defense_df.groupby('frame_id'):
    positions = list(zip(frame_df['x'], frame_df['y']))
    hull = ConvexHull(positions)
    def_charea.append({
        'frame_id': frame_id,
        'def_convex_hull_area': hull.volume
    })

pd.merge(pd.DataFrame(off_charea), pd.DataFrame(def_charea), on=['frame_id'])

Unnamed: 0,frame_id,off_convex_hull_area,def_convex_hull_area
0,1,46.62655,107.49805
1,2,46.81085,107.18355
2,3,46.99515,106.94805
3,4,47.51285,106.4778
4,5,48.8767,105.91095
5,6,51.0225,105.6614
6,7,54.5572,105.6533
7,8,59.0065,105.6383
8,9,64.37225,105.89915
9,10,70.67405,106.1089


In [224]:
float_positions = {}
for k in off_positions:
    x = [float(_x[0]) for _x in off_positions[k]]
    y = [float(_x[1]) for _x in off_positions[k]]
    float_positions[k] = list(zip(x, y))

In [217]:
contrib = {}
for k in float_positions:
    c = []
    if k == 1:
        c = [0] * len(float_positions[k])
    else:
        for p in float_positions[k]:
            with_move = float_positions[k - 1][:]
            hull_1 = ConvexHull(with_move)
            with_move.append(p)
            hull_2 = ConvexHull(with_move)
            diff = hull_2.volume - hull_1.volume
            c.append(diff)
    contrib[k] = c

In [222]:
contrib_score = [0] * len(contrib[c])
for c in contrib:
    for i, v in enumerate(contrib[c]):
        contrib_score[i] = contrib_score[i] + v

contrib_score
        
        

[61.31200000000029,
 120.82149999999997,
 28.10195000000016,
 50.46185000000013,
 198.28204999999977]

Stretch contributions per play - how much is this player stretching the field?