In [1]:
import pandas as pd
import csv
import numpy as np
from datetime import datetime
import dateutil.parser
from IPython.display import display

pd.options.display.max_columns = None
# pd.set_option('display.max_rows', None)


In [2]:
#read in data, including data for all weeks
games = pd.read_csv("games.csv")
weeks_dict = dict()
for i in range(1, 2):
    headers = [*pd.read_csv(f'week{i}_cut_by_frame_clean.csv', nrows=1)]
    weeks_dict[i] = pd.read_csv(f'week{i}_cut_by_frame_clean.csv', usecols =[i for i in headers if i != 'time'])
    #reduce memory usage
    weeks_dict[i]['gameId'] = weeks_dict[i]['gameId'].fillna(0).astype('uint32', errors = 'ignore')
    weeks_dict[i]['nflId'] = weeks_dict[i]['nflId'].fillna(0).astype('uint32', errors = 'ignore')
    weeks_dict[i]['playId'] = weeks_dict[i]['playId'].fillna(0).astype('uint16', errors = 'ignore')
    weeks_dict[i]['frameId'] = weeks_dict[i]['frameId'].fillna(0).astype('uint8', errors = 'ignore')
    weeks_dict[i]['jerseyNumber'] = weeks_dict[i]['jerseyNumber'].fillna(-1).astype('int8', errors = 'ignore')
    weeks_dict[i]['team'] = weeks_dict[i]['team'].astype('category', errors = 'ignore')
    weeks_dict[i]['playDirection'] = weeks_dict[i]['playDirection'].astype('category', errors = 'ignore')
    weeks_dict[i][weeks_dict[i].select_dtypes(np.float64).columns] = weeks_dict[i].select_dtypes(np.float64).astype(np.float32)
    weeks_dict[i]['event'] = weeks_dict[i]['event'].astype('category', errors = 'ignore')
    print(weeks_dict[i].info(memory_usage='deep'))

#nan values in binary response variables become 0
pff = pd.read_csv("pffScoutingData.csv")
pff['gameId'] = pff['gameId'].fillna(-1).astype('uint32', errors = 'ignore')
pff['nflId'] = pff['nflId'].fillna(-1).astype('uint32', errors = 'ignore')
pff['playId'] = pff['playId'].fillna(-1).astype('uint16', errors = 'ignore')
pff['pff_role'] = pff['pff_role'].astype('category')
pff['pff_positionLinedUp'] = pff['pff_positionLinedUp'].astype('category')
pff['pff_hit'] = pff['pff_hit'].fillna(0).astype('int8')
pff['pff_hurry'] = pff['pff_hurry'].fillna(0).astype('int8')
pff['pff_sack'] = pff['pff_sack'].fillna(0).astype('int8')
pff['pff_beatenByDefender'] = pff['pff_beatenByDefender'].fillna(0).astype('int8')
pff['pff_hitAllowed'] = pff['pff_hitAllowed'].fillna(0).astype('int8')
pff['pff_hurryAllowed'] = pff['pff_hurryAllowed'].fillna(0).astype('int8')
pff['pff_sackAllowed'] = pff['pff_sackAllowed'].fillna(0).astype('int8')
pff['pff_nflIdBlockedPlayer'] = pff['pff_nflIdBlockedPlayer'].fillna(0).astype('uint32')
pff['pff_blockType'] = pff['pff_blockType'].astype('category')
pff['pff_backFieldBlock'] = pff['pff_backFieldBlock'].fillna(0).astype('int8')

players = pd.read_csv("players.csv")
play = pd.read_csv("plays.csv")

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 843295 entries, 0 to 843294
Data columns (total 15 columns):
 #   Column         Non-Null Count   Dtype   
---  ------         --------------   -----   
 0   gameId         843295 non-null  uint32  
 1   playId         843295 non-null  uint16  
 2   nflId          843295 non-null  uint32  
 3   jerseyNumber   843295 non-null  int8    
 4   team           843295 non-null  category
 5   playDirection  843295 non-null  category
 6   x              843295 non-null  float32 
 7   y              843295 non-null  float32 
 8   s              843295 non-null  float32 
 9   a              843295 non-null  float32 
 10  dis            843295 non-null  float32 
 11  o              806630 non-null  float32 
 12  dir            806630 non-null  float32 
 13  event          843295 non-null  category
 14  frameId        843295 non-null  uint8   
dtypes: category(3), float32(7), int8(1), uint16(1), uint32(2), uint8(1)
memory usage: 34.6 MB
None


In [3]:
week1 = weeks_dict[1]
players_dict = players[['nflId', 'weight']].set_index('nflId').to_dict()
# create dictionary to find player weight given their nfl id
def find_player_weight(nflID):
    if nflID in players_dict['weight'].keys():
        return players_dict['weight'][nflID]
    return None


In [4]:
#calculates force
week1['weight'] = week1['nflId'].apply(lambda x : find_player_weight(x))
week1['force'] = week1['a'] * week1['weight']
#add pff positions to week1
week1_merged = week1.merge(pff, how = 'left', on = ['gameId', 'playId', 'nflId'])

In [5]:
# calculate horizontal and vertical force vectors
# we add 90 degrees since 0 degrees is facing towards sideline
import math
from math import sin, cos, radians
def calculate_x_force(force, direction):
    return force * math.cos(radians(direction + 90))
def calculate_y_force(force, direction):
    return force * math.sin(radians(direction + 90))

week1_merged['x_force'] = week1_merged.apply(lambda x : calculate_x_force(x['force'], x['dir']), axis = 1)
week1_merged['y_force'] = week1_merged.apply(lambda x : calculate_y_force(x['force'], x['dir']), axis = 1)


In [6]:
week1_defense = week1_merged[week1_merged['pff_role'] == 'Pass Rush']
week1_offense = week1_merged[week1_merged['pff_role'] == 'Pass Block']
week1_defense[['gameId', 'playId', 'nflId', 'frameId', 'dir', 'force', 'x_force', 'y_force', 'pff_role']].to_csv(r"C:\Users\Harrison\Documents\cmu2021-2022\36490\week1_defense_force.csv", index = False)
week1_offense[['gameId', 'playId', 'nflId', 'frameId', 'dir', 'force', 'x_force', 'y_force', 'pff_role']].to_csv(r"C:\Users\Harrison\Documents\cmu2021-2022\36490\week1_offense_force.csv", index = False)

In [7]:
display(week1_merged)

Unnamed: 0,gameId,playId,nflId,jerseyNumber,team,playDirection,x,y,s,a,dis,o,dir,event,frameId,weight,force,pff_role,pff_positionLinedUp,pff_hit,pff_hurry,pff_sack,pff_beatenByDefender,pff_hitAllowed,pff_hurryAllowed,pff_sackAllowed,pff_nflIdBlockedPlayer,pff_blockType,pff_backFieldBlock,x_force,y_force
0,2021090900,97,25511,12,TB,right,37.639999,24.260000,0.35,0.53,0.05,144.419998,282.720001,ball_snap,1,225.0,119.249994,Pass,QB,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,116.323329,26.257267
1,2021090900,97,25511,12,TB,right,37.560001,24.260000,0.54,1.05,0.08,137.490005,272.950012,,2,225.0,236.249989,Pass,QB,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,235.936914,12.158530
2,2021090900,97,25511,12,TB,right,37.470001,24.250000,0.80,1.85,0.09,131.949997,267.489990,,3,225.0,416.250005,Pass,QB,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,415.850649,-18.229221
3,2021090900,97,25511,12,TB,right,37.380001,24.240000,0.99,2.03,0.09,129.850006,263.480011,,4,225.0,456.749994,Pass,QB,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,453.795872,-51.863888
4,2021090900,97,25511,12,TB,right,37.270000,24.230000,1.19,1.82,0.11,123.790001,263.769989,,5,225.0,409.500012,Pass,QB,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,407.081611,-44.438969
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
843290,2021091300,4845,0,-1,football,left,50.840000,24.730000,4.30,0.46,0.46,,,,20,,,,,,,,,,,,,,,,
843291,2021091300,4845,0,-1,football,left,51.250000,24.830000,4.25,1.15,0.43,,,,21,,,,,,,,,,,,,,,,
843292,2021091300,4845,0,-1,football,left,51.669998,24.930000,4.14,1.83,0.42,,,,22,,,,,,,,,,,,,,,,
843293,2021091300,4845,0,-1,football,left,52.060001,25.030001,3.96,1.93,0.40,,,,23,,,,,,,,,,,,,,,,


In [8]:

# week1_defense_grouped = week1_defense.groupby(['gameId', 'playId', 'frameId'])
# week1_offense_grouped = week1_offense.groupby(['gameId', 'playId', 'frameId'])

def calculate_net_forces(df):
    """
    @brief calculate next x and y force
    @param[in] df is either defense or offense df (filtered dfs that contain either pass rushers or pass blockers)
    @param[out] df of columns of x force and y force
    """
    df_defense = df[df['pff_role'] == 'Pass Rush']
    df_offense = df[df['pff_role'] == 'Pass Block']
    
    offense_grouped = df_offense.groupby(['gameId', 'playId', 'frameId'])
    defense_grouped = df_defense.groupby(['gameId', 'playId', 'frameId'])
    
    net_x_force_offense = offense_grouped['x_force'].sum()
    net_y_force_offense = offense_grouped['y_force'].sum()
    
    net_x_force_defense = defense_grouped['x_force'].sum()
    net_y_force_defense = defense_grouped['y_force'].sum()
    
#     display(net_x_force_defense)
#     display(net_x_force_offense)
#     print(net_x_force_defense.index)
#     print(net_x_force_offense.index)
    x_idx = net_x_force_defense.index.union(net_x_force_offense.index)
    y_idx = net_y_force_defense.index.union(net_y_force_offense.index)
    
    total_x_force = net_x_force_defense.reindex(x_idx, fill_value = 0) + net_x_force_offense.reindex(x_idx, fill_value = 0)
    total_y_force = net_y_force_defense.reindex(y_idx, fill_value = 0) + net_y_force_offense.reindex(y_idx, fill_value = 0)
    
    
    total_x_force_df = total_x_force.to_frame(name = 'net_x_force').reset_index().set_index(['gameId', 'playId', 'frameId'])
    total_y_force_df = total_y_force.to_frame(name = 'net_y_force').reset_index().set_index(['gameId', 'playId', 'frameId'])
    
    total_force_df = pd.concat([total_x_force_df, total_y_force_df], axis = 1).reset_index()
    display(total_force_df)
    return total_force_df

calculate_net_forces(week1_merged)


Unnamed: 0,gameId,playId,frameId,net_x_force,net_y_force
0,2021090900,97,1,3813.010989,980.210665
1,2021090900,97,2,5381.368450,952.640458
2,2021090900,97,3,6375.027117,831.071763
3,2021090900,97,4,6101.939308,948.761460
4,2021090900,97,5,5524.171820,801.139818
...,...,...,...,...,...
36660,2021091300,4845,20,-4056.204611,584.906955
36661,2021091300,4845,21,-4247.679106,335.169653
36662,2021091300,4845,22,-4714.921088,229.592025
36663,2021091300,4845,23,-4924.005090,60.923283


Unnamed: 0,gameId,playId,frameId,net_x_force,net_y_force
0,2021090900,97,1,3813.010989,980.210665
1,2021090900,97,2,5381.368450,952.640458
2,2021090900,97,3,6375.027117,831.071763
3,2021090900,97,4,6101.939308,948.761460
4,2021090900,97,5,5524.171820,801.139818
...,...,...,...,...,...
36660,2021091300,4845,20,-4056.204611,584.906955
36661,2021091300,4845,21,-4247.679106,335.169653
36662,2021091300,4845,22,-4714.921088,229.592025
36663,2021091300,4845,23,-4924.005090,60.923283


In [9]:
"""
Find y locations of tackles at start of frame, and partition dataframes

"""
offense_grouped = week1_offense.groupby(['gameId', 'playId', 'frameId'])
# lg_y, rg_y = find_tackles_y(week1_merged)
# print(lg_y, rg_y)


def find_lt_y(df, simple = False):
    if simple:
        return df[(df['pff_positionLinedUp'] == 'LT') & (df['frameId'] == 1)]['y'].values[0]
    lg_y = df[df['pff_positionLinedUp'] == 'LT']['y']
    return lg_y.values[0]

def find_rt_y(df, simple = False):
    if simple:
        return df[(df['pff_positionLinedUp'] == 'LT') & (df['frameId'] == 1)]['y'].values[0]
    rg_y = df[df['pff_positionLinedUp'] == 'RT']['y']
    return rg_y.values[0]

def create_indicator(df):
    """
    @brief
    @param[in]
    @param[out]
    """
    # first two merges take the longest ~30-60 seconds
    to_merge1 = df.groupby(['gameId', 'playId', 'frameId']).apply(lambda x : find_lt_y(x)).reset_index().rename(columns = {0 : 'lt_y'})
    to_merge2 = df.groupby(['gameId', 'playId', 'frameId']).apply(lambda x : find_rt_y(x)).reset_index().rename(columns = {0 : 'rt_y'})
    
    new_df = pd.merge(df, to_merge1, how="left",on = ["frameId","playId","gameId"])
    new_df = pd.merge(new_df, to_merge2, how="left",on = ["frameId","playId","gameId"])
    
    new_df['max_y'] = new_df[['lt_y', 'rt_y']].max(axis = 1)
    new_df['min_y'] = new_df[['lt_y', 'rt_y']].min(axis = 1)
    new_df['indicator'] = np.where(new_df['y'] < new_df['min_y'], -1, 
                                         np.where((new_df['min_y'] <= new_df['y']) & (new_df['y'] <= new_df['max_y']), 0, 
                                                 1))
    return new_df

new_df = create_indicator(week1_merged)
    
    

In [10]:
display(new_df)

Unnamed: 0,gameId,playId,nflId,jerseyNumber,team,playDirection,x,y,s,a,dis,o,dir,event,frameId,weight,force,pff_role,pff_positionLinedUp,pff_hit,pff_hurry,pff_sack,pff_beatenByDefender,pff_hitAllowed,pff_hurryAllowed,pff_sackAllowed,pff_nflIdBlockedPlayer,pff_blockType,pff_backFieldBlock,x_force,y_force,lt_y,rt_y,max_y,min_y,indicator
0,2021090900,97,25511,12,TB,right,37.639999,24.260000,0.35,0.53,0.05,144.419998,282.720001,ball_snap,1,225.0,119.249994,Pass,QB,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,116.323329,26.257267,26.920000,21.309999,26.920000,21.309999,0
1,2021090900,97,25511,12,TB,right,37.560001,24.260000,0.54,1.05,0.08,137.490005,272.950012,,2,225.0,236.249989,Pass,QB,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,235.936914,12.158530,26.950001,21.270000,26.950001,21.270000,0
2,2021090900,97,25511,12,TB,right,37.470001,24.250000,0.80,1.85,0.09,131.949997,267.489990,,3,225.0,416.250005,Pass,QB,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,415.850649,-18.229221,27.010000,21.209999,27.010000,21.209999,0
3,2021090900,97,25511,12,TB,right,37.380001,24.240000,0.99,2.03,0.09,129.850006,263.480011,,4,225.0,456.749994,Pass,QB,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,453.795872,-51.863888,27.110001,21.139999,27.110001,21.139999,0
4,2021090900,97,25511,12,TB,right,37.270000,24.230000,1.19,1.82,0.11,123.790001,263.769989,,5,225.0,409.500012,Pass,QB,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,407.081611,-44.438969,27.200001,21.049999,27.200001,21.049999,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
843290,2021091300,4845,0,-1,football,left,50.840000,24.730000,4.30,0.46,0.46,,,,20,,,,,,,,,,,,,,,,,21.040001,25.020000,25.020000,21.040001,0
843291,2021091300,4845,0,-1,football,left,51.250000,24.830000,4.25,1.15,0.43,,,,21,,,,,,,,,,,,,,,,,21.090000,24.809999,24.809999,21.090000,1
843292,2021091300,4845,0,-1,football,left,51.669998,24.930000,4.14,1.83,0.42,,,,22,,,,,,,,,,,,,,,,,21.170000,24.600000,24.600000,21.170000,1
843293,2021091300,4845,0,-1,football,left,52.060001,25.030001,3.96,1.93,0.40,,,,23,,,,,,,,,,,,,,,,,21.250000,24.400000,24.400000,21.250000,1


In [11]:
test = new_df.groupby(['indicator']).apply(calculate_net_forces)

Unnamed: 0,gameId,playId,frameId,net_x_force,net_y_force
0,2021090900,97,1,225.829433,76.702765
1,2021090900,97,2,222.811755,47.888761
2,2021090900,97,3,223.532487,27.763155
3,2021090900,97,4,206.406079,34.985072
4,2021090900,97,5,340.844242,99.605315
...,...,...,...,...,...
32491,2021091300,4845,20,-143.132826,98.851377
32492,2021091300,4845,21,-67.925403,48.218562
32493,2021091300,4845,22,-91.187775,66.227488
32494,2021091300,4845,23,-182.732102,136.105115


Unnamed: 0,gameId,playId,frameId,net_x_force,net_y_force
0,2021090900,97,1,3011.310241,1139.103075
1,2021090900,97,2,4179.353807,1208.053041
2,2021090900,97,3,5065.204141,1239.113916
3,2021090900,97,4,5127.171588,1244.208433
4,2021090900,97,5,4395.666783,1093.389779
...,...,...,...,...,...
36660,2021091300,4845,20,-2900.586634,267.046953
36661,2021091300,4845,21,-3131.608748,240.612911
36662,2021091300,4845,22,-3338.164103,148.210615
36663,2021091300,4845,23,-3142.577825,-133.516249


Unnamed: 0,gameId,playId,frameId,net_x_force,net_y_force
0,2021090900,97,1,575.871315,-235.595175
1,2021090900,97,2,979.202888,-303.301343
2,2021090900,97,3,1086.290489,-435.805309
3,2021090900,97,4,768.361641,-330.432045
4,2021090900,97,5,787.660795,-391.855275
...,...,...,...,...,...
32063,2021091300,4845,20,-1012.485151,219.008625
32064,2021091300,4845,21,-1048.144954,46.338180
32065,2021091300,4845,22,-1285.569210,15.153922
32066,2021091300,4845,23,-1598.695163,58.334418


In [12]:
test1 = test.reset_index(level = 1, drop = True)
test1 = test1.reset_index()
display(test1)
groups = []
for n, g in test1.groupby('indicator'):
    groups.append(g)
#     df = start.merge(g, how = 'cross', on = [['gameId', 'playId', 'frameId']])
#     display(df)
#     break
display(groups[0])
display(groups[1])
display(groups[2])
df = groups[0].merge(groups[1], how = 'outer', on = ['gameId', 'playId', 'frameId'])
df = df.merge(groups[2], how = 'outer', on = ['gameId', 'playId', 'frameId'])
display(df)

df = df.rename(columns = {'net_x_force' : 'net_x_force_top', 'net_y_force' : 'net_y_force_top',
                         'net_x_force_x' : 'net_x_force_bottom', 'net_y_force_x' : 'net_y_force_bottom',
                         'net_x_force_y' : 'net_x_force_middle', 'net_y_force' : 'net_y_force_middle'})
display(df)
df = df.drop(columns = ['indicator', 'indicator_x', 'indicator_y'])
display(df)

Unnamed: 0,indicator,gameId,playId,frameId,net_x_force,net_y_force
0,-1,2021090900,97,1,225.829433,76.702765
1,-1,2021090900,97,2,222.811755,47.888761
2,-1,2021090900,97,3,223.532487,27.763155
3,-1,2021090900,97,4,206.406079,34.985072
4,-1,2021090900,97,5,340.844242,99.605315
...,...,...,...,...,...,...
101224,1,2021091300,4845,20,-1012.485151,219.008625
101225,1,2021091300,4845,21,-1048.144954,46.338180
101226,1,2021091300,4845,22,-1285.569210,15.153922
101227,1,2021091300,4845,23,-1598.695163,58.334418


Unnamed: 0,indicator,gameId,playId,frameId,net_x_force,net_y_force
0,-1,2021090900,97,1,225.829433,76.702765
1,-1,2021090900,97,2,222.811755,47.888761
2,-1,2021090900,97,3,223.532487,27.763155
3,-1,2021090900,97,4,206.406079,34.985072
4,-1,2021090900,97,5,340.844242,99.605315
...,...,...,...,...,...,...
32491,-1,2021091300,4845,20,-143.132826,98.851377
32492,-1,2021091300,4845,21,-67.925403,48.218562
32493,-1,2021091300,4845,22,-91.187775,66.227488
32494,-1,2021091300,4845,23,-182.732102,136.105115


Unnamed: 0,indicator,gameId,playId,frameId,net_x_force,net_y_force
32496,0,2021090900,97,1,3011.310241,1139.103075
32497,0,2021090900,97,2,4179.353807,1208.053041
32498,0,2021090900,97,3,5065.204141,1239.113916
32499,0,2021090900,97,4,5127.171588,1244.208433
32500,0,2021090900,97,5,4395.666783,1093.389779
...,...,...,...,...,...,...
69156,0,2021091300,4845,20,-2900.586634,267.046953
69157,0,2021091300,4845,21,-3131.608748,240.612911
69158,0,2021091300,4845,22,-3338.164103,148.210615
69159,0,2021091300,4845,23,-3142.577825,-133.516249


Unnamed: 0,indicator,gameId,playId,frameId,net_x_force,net_y_force
69161,1,2021090900,97,1,575.871315,-235.595175
69162,1,2021090900,97,2,979.202888,-303.301343
69163,1,2021090900,97,3,1086.290489,-435.805309
69164,1,2021090900,97,4,768.361641,-330.432045
69165,1,2021090900,97,5,787.660795,-391.855275
...,...,...,...,...,...,...
101224,1,2021091300,4845,20,-1012.485151,219.008625
101225,1,2021091300,4845,21,-1048.144954,46.338180
101226,1,2021091300,4845,22,-1285.569210,15.153922
101227,1,2021091300,4845,23,-1598.695163,58.334418


Unnamed: 0,indicator_x,gameId,playId,frameId,net_x_force_x,net_y_force_x,indicator_y,net_x_force_y,net_y_force_y,indicator,net_x_force,net_y_force
0,-1.0,2021090900,97,1,225.829433,76.702765,0,3011.310241,1139.103075,1.0,575.871315,-235.595175
1,-1.0,2021090900,97,2,222.811755,47.888761,0,4179.353807,1208.053041,1.0,979.202888,-303.301343
2,-1.0,2021090900,97,3,223.532487,27.763155,0,5065.204141,1239.113916,1.0,1086.290489,-435.805309
3,-1.0,2021090900,97,4,206.406079,34.985072,0,5127.171588,1244.208433,1.0,768.361641,-330.432045
4,-1.0,2021090900,97,5,340.844242,99.605315,0,4395.666783,1093.389779,1.0,787.660795,-391.855275
...,...,...,...,...,...,...,...,...,...,...,...,...
36660,,2021091300,4765,30,,,0,1118.764123,898.635577,1.0,51.306529,-1780.221784
36661,,2021091300,4765,31,,,0,929.065577,647.767261,1.0,-583.874277,-1512.301239
36662,,2021091300,4765,32,,,0,789.571179,130.987269,1.0,-1134.759790,-893.942712
36663,,2021091300,4765,33,,,0,439.674322,-862.376669,1.0,-1975.420449,440.543421


Unnamed: 0,indicator_x,gameId,playId,frameId,net_x_force_bottom,net_y_force_bottom,indicator_y,net_x_force_middle,net_y_force_y,indicator,net_x_force_top,net_y_force_middle
0,-1.0,2021090900,97,1,225.829433,76.702765,0,3011.310241,1139.103075,1.0,575.871315,-235.595175
1,-1.0,2021090900,97,2,222.811755,47.888761,0,4179.353807,1208.053041,1.0,979.202888,-303.301343
2,-1.0,2021090900,97,3,223.532487,27.763155,0,5065.204141,1239.113916,1.0,1086.290489,-435.805309
3,-1.0,2021090900,97,4,206.406079,34.985072,0,5127.171588,1244.208433,1.0,768.361641,-330.432045
4,-1.0,2021090900,97,5,340.844242,99.605315,0,4395.666783,1093.389779,1.0,787.660795,-391.855275
...,...,...,...,...,...,...,...,...,...,...,...,...
36660,,2021091300,4765,30,,,0,1118.764123,898.635577,1.0,51.306529,-1780.221784
36661,,2021091300,4765,31,,,0,929.065577,647.767261,1.0,-583.874277,-1512.301239
36662,,2021091300,4765,32,,,0,789.571179,130.987269,1.0,-1134.759790,-893.942712
36663,,2021091300,4765,33,,,0,439.674322,-862.376669,1.0,-1975.420449,440.543421


Unnamed: 0,gameId,playId,frameId,net_x_force_bottom,net_y_force_bottom,net_x_force_middle,net_y_force_y,net_x_force_top,net_y_force_middle
0,2021090900,97,1,225.829433,76.702765,3011.310241,1139.103075,575.871315,-235.595175
1,2021090900,97,2,222.811755,47.888761,4179.353807,1208.053041,979.202888,-303.301343
2,2021090900,97,3,223.532487,27.763155,5065.204141,1239.113916,1086.290489,-435.805309
3,2021090900,97,4,206.406079,34.985072,5127.171588,1244.208433,768.361641,-330.432045
4,2021090900,97,5,340.844242,99.605315,4395.666783,1093.389779,787.660795,-391.855275
...,...,...,...,...,...,...,...,...,...
36660,2021091300,4765,30,,,1118.764123,898.635577,51.306529,-1780.221784
36661,2021091300,4765,31,,,929.065577,647.767261,-583.874277,-1512.301239
36662,2021091300,4765,32,,,789.571179,130.987269,-1134.759790,-893.942712
36663,2021091300,4765,33,,,439.674322,-862.376669,-1975.420449,440.543421


In [19]:
def animate_play(tracking_df, play_df,players,pffScoutingData, gameId,playId):
    selected_play_df = play_df[(play_df.playId==playId)&(play_df.gameId==gameId)].copy()
    
    tracking_players_df = pd.merge(tracking_df,players,how="left",on = "nflId")
    tracking_players_df = pd.merge(tracking_players_df,pffScoutingData,how="left",on = ["nflId","playId","gameId"])
    selected_tracking_df = tracking_players_df[(tracking_players_df.playId==playId)&(tracking_players_df.gameId==gameId)].copy()

    sorted_frame_list = selected_tracking_df.frameId.unique()
    sorted_frame_list.sort()

    # get play General information 
    line_of_scrimmage = selected_play_df.absoluteYardlineNumber.values[0]
    first_down_marker = line_of_scrimmage + selected_play_df.yardsToGo.values[0]
    down = selected_play_df.down.values[0]
    quarter = selected_play_df.quarter.values[0]
    gameClock = selected_play_df.gameClock.values[0]
    playDescription = selected_play_df.playDescription.values[0]
    # Handle case where we have a really long Play Description and want to split it into two lines
    if len(playDescription.split(" "))>15 and len(playDescription)>115:
        playDescription = " ".join(playDescription.split(" ")[0:16]) + "<br>" + " ".join(playDescription.split(" ")[16:])

    # initialize plotly start and stop buttons for animation
    updatemenus_dict = [
        {
            "buttons": [
                {
                    "args": [None, {"frame": {"duration": 100, "redraw": False},
                                "fromcurrent": True, "transition": {"duration": 0}}],
                    "label": "Play",
                    "method": "animate"
                },
                {
                    "args": [[None], {"frame": {"duration": 0, "redraw": False},
                                      "mode": "immediate",
                                      "transition": {"duration": 0}}],
                    "label": "Pause",
                    "method": "animate"
                }
            ],
            "direction": "left",
            "pad": {"r": 10, "t": 87},
            "showactive": False,
            "type": "buttons",
            "x": 0.1,
            "xanchor": "right",
            "y": 0,
            "yanchor": "top"
        }
    ]
    # initialize plotly slider to show frame position in animation
    sliders_dict = {
        "active": 0,
        "yanchor": "top",
        "xanchor": "left",
        "currentvalue": {
            "font": {"size": 20},
            "prefix": "Frame:",
            "visible": True,
            "xanchor": "right"
        },
        "transition": {"duration": 300, "easing": "cubic-in-out"},
        "pad": {"b": 10, "t": 50},
        "len": 0.9,
        "x": 0.1,
        "y": 0,
        "steps": []
    }


    frames = []
    for frameId in sorted_frame_list:
        data = []
        # Add Numbers to Field 
        data.append(
            go.Scatter(
                x=np.arange(20,110,10), 
                y=[5]*len(np.arange(20,110,10)),
                mode='text',
                text=list(map(str,list(np.arange(20, 61, 10)-10)+list(np.arange(40, 9, -10)))),
                textfont_size = 30,
                textfont_family = "Courier New, monospace",
                textfont_color = "#ffffff",
                showlegend=False,
                hoverinfo='none'
            )
        )
        data.append(
            go.Scatter(
                x=np.arange(20,110,10), 
                y=[53.5-5]*len(np.arange(20,110,10)),
                mode='text',
                text=list(map(str,list(np.arange(20, 61, 10)-10)+list(np.arange(40, 9, -10)))),
                textfont_size = 30,
                textfont_family = "Courier New, monospace",
                textfont_color = "#ffffff",
                showlegend=False,
                hoverinfo='none'
            )
        )
        # Add line of scrimage 
        data.append(
            go.Scatter(
                x=[line_of_scrimmage,line_of_scrimmage], 
                y=[0,53.5],
                line_dash='dash',
                line_color='blue',
                showlegend=False,
                hoverinfo='none'
            )
        )
        # Add First down line 
        data.append(
            go.Scatter(
                x=[first_down_marker,first_down_marker], 
                y=[0,53.5],
                line_dash='dash',
                line_color='yellow',
                showlegend=False,
                hoverinfo='none'
            )
        )
        # Plot Players
        for team in selected_tracking_df.team.unique():
            plot_df = selected_tracking_df[(selected_tracking_df.team==team)&(selected_tracking_df.frameId==frameId)].copy()
            if team != "football":
                hover_text_array=[]
                for nflId in plot_df.nflId:
                    selected_player_df = plot_df[plot_df.nflId==nflId]
                    hover_text_array.append("nflId:{}<br>displayName:{}<br>Position:{}<br>Role:{}".format(selected_player_df["nflId"].values[0],
                                                                                      selected_player_df["displayName"].values[0],
                                                                                      selected_player_df["pff_positionLinedUp"].values[0],
                                                                                      selected_player_df["pff_role"].values[0]))
                data.append(go.Scatter(x=plot_df["x"], y=plot_df["y"],mode = 'markers',marker_color=colors[team],name=team,hovertext=hover_text_array,hoverinfo="text"))
                #appending arrows here
                data.append(go.Scatter(x=plot_df["x"], y = plot_df["y"], mode = 'lines', marker_color="red"))
            else:
                data.append(go.Scatter(x=plot_df["x"], y=plot_df["y"],mode = 'markers',marker_color=colors[team],name=team,hoverinfo='none'))
        # add frame to slider
        slider_step = {"args": [
            [frameId],
            {"frame": {"duration": 100, "redraw": False},
             "mode": "immediate",
             "transition": {"duration": 0}}
        ],
            "label": str(frameId),
            "method": "animate"}
        sliders_dict["steps"].append(slider_step)
        frames.append(go.Frame(data=data, name=str(frameId)))

    scale=10
    layout = go.Layout(
        autosize=False,
        width=120*scale,
        height=60*scale,
        xaxis=dict(range=[0, 120], autorange=False, tickmode='array',tickvals=np.arange(10, 111, 5).tolist(),showticklabels=False),
        yaxis=dict(range=[0, 53.3], autorange=False,showgrid=False,showticklabels=False),

        plot_bgcolor='#00B140',
        # Create title and add play description at the bottom of the chart for better visual appeal
        title=f"GameId: {gameId}, PlayId: {playId}<br>{gameClock} {quarter}Q"+"<br>"*19+f"{playDescription}",
        updatemenus=updatemenus_dict,
        sliders = [sliders_dict]
    )

    fig = go.Figure(
        data=frames[0]["data"],
        layout= layout,
        frames=frames[1:]
    )
    # Create First Down Markers 
    for y_val in [0,53]:
        fig.add_annotation(
                x=first_down_marker,
                y=y_val,
                text=str(down),
                showarrow=False,
                font=dict(
                    family="Courier New, monospace",
                    size=16,
                    color="black"
                    ),
                align="center",
                bordercolor="black",
                borderwidth=2,
                borderpad=4,
                bgcolor="#ff7f0e",
                opacity=1
                )

    return fig

In [20]:
import plotly.graph_objects as go
colors = {
    'ARI':"#97233F", 
    'ATL':"#A71930", 
    'BAL':'#241773', 
    'BUF':"#00338D", 
    'CAR':"#0085CA", 
    'CHI':"#C83803", 
    'CIN':"#FB4F14", 
    'CLE':"#311D00", 
    'DAL':'#003594',
    'DEN':"#FB4F14", 
    'DET':"#0076B6", 
    'GB':"#203731", 
    'HOU':"#03202F", 
    'IND':"#002C5F", 
    'JAX':"#9F792C", 
    'KC':"#E31837", 
    'LA':"#003594", 
    'LAC':"#0080C6", 
    'LV':"#000000",
    'MIA':"#008E97", 
    'MIN':"#4F2683", 
    'NE':"#002244", 
    'NO':"#D3BC8D", 
    'NYG':"#0B2265", 
    'NYJ':"#125740", 
    'PHI':"#004C54", 
    'PIT':"#FFB612", 
    'SEA':"#69BE28", 
    'SF':"#AA0000",
    'TB':'#D50A0A', 
    'TEN':"#4B92DB", 
    'WAS':"#5A1414", 
    'football':'#CBB67C'
}

# input was originally weeks1_dict
animate_play(weeks_dict[1], play,players,pff, 2021091300,4845).show()

In [15]:
display(pff.head())

Unnamed: 0,gameId,playId,nflId,pff_role,pff_positionLinedUp,pff_hit,pff_hurry,pff_sack,pff_beatenByDefender,pff_hitAllowed,pff_hurryAllowed,pff_sackAllowed,pff_nflIdBlockedPlayer,pff_blockType,pff_backFieldBlock
0,2021090900,97,25511,Pass,QB,0,0,0,0,0,0,0,0,,0
1,2021090900,97,35481,Pass Route,TE-L,0,0,0,0,0,0,0,0,,0
2,2021090900,97,35634,Pass Route,LWR,0,0,0,0,0,0,0,0,,0
3,2021090900,97,39985,Pass Route,HB-R,0,0,0,0,0,0,0,0,,0
4,2021090900,97,40151,Pass Block,C,0,0,0,0,0,0,0,44955,SW,0


In [16]:
#consider partitioning the field in 6ths, consider force in those regions. maybe consider the coverage as well
# weigh by summing up the inverses, 1/
# multiply this "avg" by the number of players on offense or defense
# random forest
# gradient boosted trees
# consider making a prediciton for a specific frame
# consider using frame id as categorical variable
# a decision rule could be if frame 1, no hit, no sack, no hurry 
# probability increases as frame increases
# xgboost
# how does model perform over time?
# try a gam model, get some linear terms, smoothing terms, partial response terms
# autocorrelation, between frames? how would we capture that
# ELO with football players?