In [701]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.spatial.distance import cdist
from tqdm.notebook import tqdm
import IPython

Reading in Data

In [749]:
"""Emily's reading in files"""
data_dir = os.getcwd() + '/data/nfl-big-data-bowl-2024/'
players = pd.read_csv(data_dir + "players.csv")
week = pd.read_csv(data_dir + "tracking_week_1.csv")
plays = pd.read_csv(data_dir + "plays.csv")
tackles = pd.read_csv(data_dir + "tackles.csv")


# join player positioning information onto a week's worth of tracking data 
week = week.merge(players.loc[:, ['nflId', 'position']], how='left')
week.shape

(1407439, 18)

In [750]:
# initial filter
#week = week[(week['gameId'].isin([2022090800, 2022091200])) & (week['playId'].isin([56, 3826])) & (week['frameId'].isin([49,50,3,2,1]))]
#week = week[(week['gameId']==2022090800) | (week['gameId'] == 2022091200)]
# get distance from ball
grouped = week.groupby(['gameId','playId', 'frameId']).apply(lambda g: g[g['club'] == 'football'])[['x','y']]
test = week.merge(grouped, on = ['gameId','playId', 'frameId'], how = 'left', suffixes=('','_ball') )
test['distance_to_ball'] = np.sqrt((test["x"] - test["x_ball"]) ** 2 + (test["y"] - test["y_ball"]) ** 2)

#sort and unique value
test_sorted = test.sort_values(['gameId','playId', 'frameId', 'club', 'position', 'jerseyNumber'])

In [751]:
pre_final = test_sorted.copy()
pre_final[['gameId', 'playId', 'frameId', 'position','jerseyNumber', 'club', 'x', 'y', 's', 'a', 'dis', 'o', 'dir', 'distance_to_ball' ]]

Unnamed: 0,gameId,playId,frameId,position,jerseyNumber,club,x,y,s,a,dis,o,dir,distance_to_ball
66,2022090800,56,1,C,60.0,BUF,88.25,28.74,0.67,1.97,0.07,244.21,30.29,5.995406
374,2022090800,56,1,C,71.0,BUF,90.22,32.08,2.77,2.46,0.28,103.24,115.97,5.451768
0,2022090800,56,1,G,76.0,BUF,88.37,27.27,1.62,1.15,0.16,231.74,147.90,7.334439
242,2022090800,56,1,QB,17.0,BUF,90.75,30.01,0.29,1.00,0.03,299.06,301.11,6.850546
286,2022090800,56,1,RB,26.0,BUF,88.17,24.17,1.94,0.33,0.19,325.95,238.13,10.132325
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1407014,2022091200,3826,53,ILB,56.0,SEA,56.58,10.06,0.44,3.86,0.07,325.84,318.56,1.131459
1407120,2022091200,3826,53,OLB,52.0,SEA,54.34,10.10,1.51,1.90,0.16,99.54,217.56,1.316738
1407332,2022091200,3826,53,OLB,53.0,SEA,56.65,7.97,0.18,2.09,0.01,34.58,225.96,2.815386
1406802,2022091200,3826,53,SS,26.0,SEA,50.43,14.43,4.88,3.39,0.50,127.24,139.90,6.428001


In [753]:
pre_final['number'] = pre_final.groupby(['gameId','playId', 'frameId', 'club']).cumcount()
pre_final = pre_final[pre_final['club'] != 'football']

In [754]:
pre_final = pre_final.sort_values(by = ['gameId', 'playId', 'frameId', 'position', 'jerseyNumber'])

In [755]:
pd.set_option("display.max_rows", 100)

In [756]:
pre_final

Unnamed: 0,gameId,playId,nflId,displayName,frameId,time,jerseyNumber,club,playDirection,x,...,a,dis,o,dir,event,position,x_ball,y_ball,distance_to_ball,number
66,2022090800,56,42392.0,Mitch Morse,1,2022-09-08 20:24:05.200000,60.0,BUF,left,88.25,...,1.97,0.07,244.21,30.29,,C,85.050003,33.810001,5.995406,0
374,2022090800,56,48512.0,Ryan Bates,1,2022-09-08 20:24:05.200000,71.0,BUF,left,90.22,...,2.46,0.28,103.24,115.97,,C,85.050003,33.810001,5.451768,1
110,2022090800,56,42816.0,Troy Hill,1,2022-09-08 20:24:05.200000,2.0,LA,left,72.34,...,0.91,0.43,338.33,257.23,,CB,85.050003,33.810001,28.940357,0
132,2022090800,56,43294.0,Jalen Ramsey,1,2022-09-08 20:24:05.200000,5.0,LA,left,77.95,...,3.00,0.54,139.54,172.50,,CB,85.050003,33.810001,9.949418,1
154,2022090800,56,43298.0,Leonard Floyd,1,2022-09-08 20:24:05.200000,54.0,LA,left,91.99,...,4.76,0.20,155.94,136.01,,DE,85.050003,33.810001,6.940062,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1406325,2022091200,3826,41369.0,Cameron Fleming,53,2022-09-12 23:05:58.200000,73.0,DEN,left,62.80,...,1.91,0.31,239.26,237.05,,T,55.570000,10.570000,16.741652,6
1407226,2022091200,3826,52526.0,Albert Okwuegbunam,53,2022-09-12 23:05:58.200000,85.0,DEN,left,53.39,...,2.76,0.20,123.00,169.13,,TE,55.570000,10.570000,3.806153,7
1407067,2022091200,3826,52454.0,K.J. Hamler,53,2022-09-12 23:05:58.200000,1.0,DEN,left,38.27,...,0.77,0.23,299.89,117.69,,WR,55.570000,10.570000,38.755547,8
1406961,2022091200,3826,52423.0,Jerry Jeudy,53,2022-09-12 23:05:58.200000,10.0,DEN,left,56.61,...,0.11,0.09,210.56,197.88,,WR,55.570000,10.570000,25.671075,9


In [757]:
#pre_final.head(1000)
pre_final = pre_final.reset_index(drop = True)

In [758]:
#pre_final.sort_values(['gameId','playId', 'frameId', 'position', 'jerseyNumber'])

In [759]:
#pre_final.sort_values(by = ['gameId', 'playId', 'frameId', 'position', 'jerseyNumber'])[['gameId', 'playId', 'frameId', 'position', 'jerseyNumber', 'distance_to_ball']]

In [760]:
pre_final_2 = pre_final.copy()
pre_final_2 = pre_final_2.reset_index(drop = True)
plays_temp = plays[['gameId', 'playId', 'possessionTeam', 'defensiveTeam']]
sides = pre_final_2.merge(plays_temp, on = ['gameId', 'playId'])

In [761]:
sides

Unnamed: 0,gameId,playId,nflId,displayName,frameId,time,jerseyNumber,club,playDirection,x,...,o,dir,event,position,x_ball,y_ball,distance_to_ball,number,possessionTeam,defensiveTeam
0,2022090800,56,42392.0,Mitch Morse,1,2022-09-08 20:24:05.200000,60.0,BUF,left,88.25,...,244.21,30.29,,C,85.050003,33.810001,5.995406,0,BUF,LA
1,2022090800,56,48512.0,Ryan Bates,1,2022-09-08 20:24:05.200000,71.0,BUF,left,90.22,...,103.24,115.97,,C,85.050003,33.810001,5.451768,1,BUF,LA
2,2022090800,56,42816.0,Troy Hill,1,2022-09-08 20:24:05.200000,2.0,LA,left,72.34,...,338.33,257.23,,CB,85.050003,33.810001,28.940357,0,BUF,LA
3,2022090800,56,43294.0,Jalen Ramsey,1,2022-09-08 20:24:05.200000,5.0,LA,left,77.95,...,139.54,172.50,,CB,85.050003,33.810001,9.949418,1,BUF,LA
4,2022090800,56,43298.0,Leonard Floyd,1,2022-09-08 20:24:05.200000,54.0,LA,left,91.99,...,155.94,136.01,,DE,85.050003,33.810001,6.940062,2,BUF,LA
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1346241,2022091200,3826,41369.0,Cameron Fleming,53,2022-09-12 23:05:58.200000,73.0,DEN,left,62.80,...,239.26,237.05,,T,55.570000,10.570000,16.741652,6,DEN,SEA
1346242,2022091200,3826,52526.0,Albert Okwuegbunam,53,2022-09-12 23:05:58.200000,85.0,DEN,left,53.39,...,123.00,169.13,,TE,55.570000,10.570000,3.806153,7,DEN,SEA
1346243,2022091200,3826,52454.0,K.J. Hamler,53,2022-09-12 23:05:58.200000,1.0,DEN,left,38.27,...,299.89,117.69,,WR,55.570000,10.570000,38.755547,8,DEN,SEA
1346244,2022091200,3826,52423.0,Jerry Jeudy,53,2022-09-12 23:05:58.200000,10.0,DEN,left,56.61,...,210.56,197.88,,WR,55.570000,10.570000,25.671075,9,DEN,SEA


In [762]:
offense = sides[sides['possessionTeam'] == sides['club']]
offense = offense.rename(columns={c: 'o_'+c for c in offense.columns if c in ['x', 'y', 's', 'a', 'dis', 'o', 'dir', 'distance_to_ball']})

In [765]:
defense = sides[sides['defensiveTeam'] == sides['club']]
defense = defense.rename(columns={c: 'd_'+c for c in defense.columns if c in ['x', 'y', 's', 'a', 'dis', 'o', 'dir', 'distance_to_ball']})

In [766]:
o_table = pd.pivot_table(offense, values=['o_x', 'o_y', 'o_s', 'o_a', 'o_dis', 'o_o', 'o_dir', 'o_distance_to_ball'], index=['gameId', 'playId', 'frameId'],
                                                                     #,'position', 'jerseyNumber'],
                       columns=['number'], aggfunc="mean")
o_table.columns = o_table.columns.map('{0[0]}_{0[1]}'.format) 
o_table = o_table.reset_index(['gameId','playId','frameId'])

In [768]:
d_table = pd.pivot_table(defense, values=['d_x', 'd_y', 'd_s', 'd_a', 'd_dis', 'd_o', 'd_dir', 'd_distance_to_ball'], index=['gameId', 'playId', 'frameId'],
                                                                     #,'position', 'jerseyNumber'],
                       columns=['number'], aggfunc="mean")
d_table.columns = d_table.columns.map('{0[0]}_{0[1]}'.format) 
d_table = d_table.reset_index(['gameId','playId','frameId'])

In [769]:
d_table

Unnamed: 0,gameId,playId,frameId,d_a_0,d_a_1,d_a_2,d_a_3,d_a_4,d_a_5,d_a_6,...,d_y_1,d_y_2,d_y_3,d_y_4,d_y_5,d_y_6,d_y_7,d_y_8,d_y_9,d_y_10
0,2022090800,56,1,0.91,3.00,4.76,2.88,2.34,4.56,3.81,...,40.78,33.78,25.47,31.61,21.15,27.53,33.64,28.50,37.91,20.33
1,2022090800,56,2,1.68,2.29,4.35,2.68,2.21,6.07,4.43,...,40.21,33.65,25.54,31.32,21.26,27.69,33.57,28.69,37.60,20.33
2,2022090800,56,3,2.26,1.78,4.02,2.91,2.27,5.64,4.20,...,39.63,33.52,25.59,30.98,21.44,27.88,33.50,28.90,37.25,20.34
3,2022090800,56,4,2.78,1.18,3.57,2.82,2.39,4.80,3.87,...,39.03,33.39,25.63,30.65,21.67,28.13,33.44,29.14,36.87,20.39
4,2022090800,56,5,4.00,0.97,2.85,2.58,2.52,3.90,3.19,...,38.44,33.26,25.65,30.30,21.95,28.42,33.39,29.38,36.48,20.45
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61188,2022091200,3826,49,2.49,0.90,2.86,4.79,0.05,1.43,0.30,...,5.32,16.23,5.39,24.40,4.44,7.96,9.65,10.72,7.82,16.14
61189,2022091200,3826,50,2.35,0.41,2.91,4.58,0.03,1.14,0.75,...,5.53,15.69,5.08,24.36,4.59,8.01,9.81,10.54,7.90,15.67
61190,2022091200,3826,51,1.98,0.32,3.09,4.28,0.10,0.75,0.90,...,5.74,15.19,4.82,24.31,4.75,8.05,9.92,10.37,7.95,15.23
61191,2022091200,3826,52,1.69,0.63,3.21,3.73,0.18,0.43,1.22,...,5.95,14.73,4.59,24.26,4.91,8.08,10.01,10.23,7.98,14.81


In [770]:
final_input_df = o_table.merge(d_table, on = ['gameId', 'playId', 'frameId'])

In [771]:
final_input_df.gameId.unique()

array([2022090800, 2022091100, 2022091101, 2022091102, 2022091103,
       2022091104, 2022091105, 2022091106, 2022091107, 2022091108,
       2022091109, 2022091110, 2022091111, 2022091112, 2022091113,
       2022091200])

In [772]:
final_input_df

Unnamed: 0,gameId,playId,frameId,o_a_0,o_a_1,o_a_2,o_a_3,o_a_4,o_a_5,o_a_6,...,d_y_1,d_y_2,d_y_3,d_y_4,d_y_5,d_y_6,d_y_7,d_y_8,d_y_9,d_y_10
0,2022090800,56,1,1.97,2.46,1.15,1.00,0.33,2.47,2.57,...,40.78,33.78,25.47,31.61,21.15,27.53,33.64,28.50,37.91,20.33
1,2022090800,56,2,1.82,2.53,0.61,1.22,0.66,2.59,2.75,...,40.21,33.65,25.54,31.32,21.26,27.69,33.57,28.69,37.60,20.33
2,2022090800,56,3,1.60,2.73,0.49,1.24,0.92,2.56,2.90,...,39.63,33.52,25.59,30.98,21.44,27.88,33.50,28.90,37.25,20.34
3,2022090800,56,4,1.26,2.74,0.89,1.03,1.37,2.40,2.92,...,39.03,33.39,25.63,30.65,21.67,28.13,33.44,29.14,36.87,20.39
4,2022090800,56,5,0.99,2.94,1.24,0.76,1.63,2.50,2.75,...,38.44,33.26,25.65,30.30,21.95,28.42,33.39,29.38,36.48,20.45
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61188,2022091200,3826,49,1.46,1.23,1.40,0.23,1.58,0.20,1.22,...,5.32,16.23,5.39,24.40,4.44,7.96,9.65,10.72,7.82,16.14
61189,2022091200,3826,50,1.45,1.22,1.70,0.23,2.44,0.27,1.41,...,5.53,15.69,5.08,24.36,4.59,8.01,9.81,10.54,7.90,15.67
61190,2022091200,3826,51,1.56,1.29,1.93,0.28,2.67,0.34,1.57,...,5.74,15.19,4.82,24.31,4.75,8.05,9.92,10.37,7.95,15.23
61191,2022091200,3826,52,1.55,1.41,2.06,0.53,3.12,0.58,1.74,...,5.95,14.73,4.59,24.26,4.91,8.08,10.01,10.23,7.98,14.81
