In [18]:
import pandas as pd
import csv
import numpy as np
from datetime import datetime
import dateutil.parser
from IPython.display import display

pd.options.display.max_columns = None
# pd.set_option('display.max_rows', None)


In [19]:
#read in data, including data for all weeks
games = pd.read_csv("games.csv")
weeks_dict = dict()
for i in range(1, 2):
    headers = [*pd.read_csv(f'week{i}.csv', nrows=1)]
    weeks_dict[i] = pd.read_csv(f'week{i}.csv', usecols =[i for i in headers if i != 'time'])
    #reduce memory usage
    weeks_dict[i]['gameId'] = weeks_dict[i]['gameId'].fillna(0).astype('uint32', errors = 'ignore')
    weeks_dict[i]['nflId'] = weeks_dict[i]['nflId'].fillna(0).astype('uint32', errors = 'ignore')
    weeks_dict[i]['playId'] = weeks_dict[i]['playId'].fillna(0).astype('uint16', errors = 'ignore')
    weeks_dict[i]['frameId'] = weeks_dict[i]['frameId'].fillna(0).astype('uint8', errors = 'ignore')
    weeks_dict[i]['jerseyNumber'] = weeks_dict[i]['jerseyNumber'].fillna(-1).astype('int8', errors = 'ignore')
    weeks_dict[i]['team'] = weeks_dict[i]['team'].astype('category', errors = 'ignore')
    weeks_dict[i]['playDirection'] = weeks_dict[i]['playDirection'].astype('category', errors = 'ignore')
    weeks_dict[i][weeks_dict[i].select_dtypes(np.float64).columns] = weeks_dict[i].select_dtypes(np.float64).astype(np.float32)
    weeks_dict[i]['event'] = weeks_dict[i]['event'].astype('category', errors = 'ignore')
    print(weeks_dict[i].info(memory_usage='deep'))

#nan values in binary response variables become 0
pff = pd.read_csv("pffScoutingData.csv")
pff['gameId'] = pff['gameId'].fillna(-1).astype('uint32', errors = 'ignore')
pff['nflId'] = pff['nflId'].fillna(-1).astype('uint32', errors = 'ignore')
pff['playId'] = pff['playId'].fillna(-1).astype('uint16', errors = 'ignore')
pff['pff_role'] = pff['pff_role'].astype('category')
pff['pff_positionLinedUp'] = pff['pff_positionLinedUp'].astype('category')
pff['pff_hit'] = pff['pff_hit'].fillna(0).astype('int8')
pff['pff_hurry'] = pff['pff_hurry'].fillna(0).astype('int8')
pff['pff_sack'] = pff['pff_sack'].fillna(0).astype('int8')
pff['pff_beatenByDefender'] = pff['pff_beatenByDefender'].fillna(0).astype('int8')
pff['pff_hitAllowed'] = pff['pff_hitAllowed'].fillna(0).astype('int8')
pff['pff_hurryAllowed'] = pff['pff_hurryAllowed'].fillna(0).astype('int8')
pff['pff_sackAllowed'] = pff['pff_sackAllowed'].fillna(0).astype('int8')
pff['pff_nflIdBlockedPlayer'] = pff['pff_nflIdBlockedPlayer'].fillna(0).astype('uint32')
pff['pff_blockType'] = pff['pff_blockType'].astype('category')
pff['pff_backFieldBlock'] = pff['pff_backFieldBlock'].fillna(0).astype('int8')

players = pd.read_csv("players.csv")
play = pd.read_csv("plays.csv")

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1118122 entries, 0 to 1118121
Data columns (total 15 columns):
 #   Column         Non-Null Count    Dtype   
---  ------         --------------    -----   
 0   gameId         1118122 non-null  uint32  
 1   playId         1118122 non-null  uint16  
 2   nflId          1118122 non-null  uint32  
 3   frameId        1118122 non-null  uint8   
 4   jerseyNumber   1118122 non-null  int8    
 5   team           1118122 non-null  category
 6   playDirection  1118122 non-null  category
 7   x              1118122 non-null  float32 
 8   y              1118122 non-null  float32 
 9   s              1118122 non-null  float32 
 10  a              1118122 non-null  float32 
 11  dis            1118122 non-null  float32 
 12  o              1069508 non-null  float32 
 13  dir            1069508 non-null  float32 
 14  event          1118122 non-null  category
dtypes: category(3), float32(7), int8(1), uint16(1), uint32(2), uint8(1)
memory usage: 4

In [20]:
week1 = weeks_dict[1]
players_dict = players[['nflId', 'weight']].set_index('nflId').to_dict()
# create dictionary to find player weight given their nfl id
def find_player_weight(nflID):
    if nflID in players_dict['weight'].keys():
        return players_dict['weight'][nflID]
    return None


In [21]:
#calculates force
week1['weight'] = week1['nflId'].apply(lambda x : find_player_weight(x))
week1['force'] = week1['a'] * week1['weight']
#add pff positions to week1
week1_merged = week1.merge(pff, how = 'left', on = ['gameId', 'playId', 'nflId'])

In [22]:
# calculate horizontal and vertical force vectors
# we add 90 degrees since 0 degrees is facing towards sideline
import math
from math import sin, cos, radians
def calculate_x_force(force, direction):
    return force * math.cos(radians(direction + 90))
def calculate_y_force(force, direction):
    return force * math.sin(radians(direction + 90))

week1_merged['x_force'] = week1_merged.apply(lambda x : calculate_x_force(x['force'], x['dir']), axis = 1)
week1_merged['y_force'] = week1_merged.apply(lambda x : calculate_y_force(x['force'], x['dir']), axis = 1)


In [23]:
week1_defense = week1_merged[week1_merged['pff_role'] == 'Pass Rush']
week1_offense = week1_merged[week1_merged['pff_role'] == 'Pass Block']
# week1_defense[['gameId', 'playId', 'nflId', 'frameId', 'dir', 'force', 'x_force', 'y_force', 'pff_role']].to_csv(r"C:\Users\Harrison\Documents\cmu2021-2022\36490\week1_defense_force.csv", index = False)
# week1_offense[['gameId', 'playId', 'nflId', 'frameId', 'dir', 'force', 'x_force', 'y_force', 'pff_role']].to_csv(r"C:\Users\Harrison\Documents\cmu2021-2022\36490\week1_offense_force.csv", index = False)

In [24]:
display(week1_merged)
# week1_merged[(week1_merged['pff_role'] == "Pass Block") | (week1_merged['pff_role'] == "Pass Rush")].to_csv("week1_merged.csv", index = False)

Unnamed: 0,gameId,playId,nflId,frameId,jerseyNumber,team,playDirection,x,y,s,a,dis,o,dir,event,weight,force,pff_role,pff_positionLinedUp,pff_hit,pff_hurry,pff_sack,pff_beatenByDefender,pff_hitAllowed,pff_hurryAllowed,pff_sackAllowed,pff_nflIdBlockedPlayer,pff_blockType,pff_backFieldBlock,x_force,y_force
0,2021090900,97,25511,1,12,TB,right,37.770000,24.219999,0.290000,0.30,0.03,165.160004,84.989998,,225.0,67.500003,Pass,QB,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,-67.242117,5.894751
1,2021090900,97,25511,2,12,TB,right,37.779999,24.219999,0.230000,0.11,0.02,164.330002,92.870003,,225.0,24.750000,Pass,QB,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,-24.718956,-1.239234
2,2021090900,97,25511,3,12,TB,right,37.779999,24.240000,0.160000,0.10,0.01,160.240005,68.550003,,225.0,22.500000,Pass,QB,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,-20.941584,8.228005
3,2021090900,97,25511,4,12,TB,right,37.730000,24.250000,0.150000,0.24,0.06,152.130005,296.850006,,225.0,53.999999,Pass,QB,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,48.178365,24.389445
4,2021090900,97,25511,5,12,TB,right,37.689999,24.260000,0.250000,0.18,0.04,148.330002,287.549988,,225.0,40.500002,Pass,QB,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,38.614898,12.212280
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1118117,2021091300,4845,0,30,-1,football,left,52.779999,25.230000,3.580000,1.95,0.37,,,pass_forward,,,,,,,,,,,,,,,,
1118118,2021091300,4845,0,31,-1,football,left,50.310001,26.459999,17.160000,0.25,2.77,,,,,,,,,,,,,,,,,,,
1118119,2021091300,4845,0,32,-1,football,left,48.660000,26.990000,17.100000,1.05,1.73,,,,,,,,,,,,,,,,,,,
1118120,2021091300,4845,0,33,-1,football,left,47.040001,27.530001,16.980000,1.67,1.71,,,,,,,,,,,,,,,,,,,


In [25]:

# week1_defense_grouped = week1_defense.groupby(['gameId', 'playId', 'frameId'])
# week1_offense_grouped = week1_offense.groupby(['gameId', 'playId', 'frameId'])

def calculate_net_forces(df):
    """
    @brief calculate next x and y force
    @param[in] df is either defense or offense df (filtered dfs that contain either pass rushers or pass blockers)
    @param[out] df of columns of x force and y force
    """
    df_defense = df[df['pff_role'] == 'Pass Rush']
    df_offense = df[df['pff_role'] == 'Pass Block']
    
    offense_grouped = df_offense.groupby(['gameId', 'playId', 'frameId'])
    defense_grouped = df_defense.groupby(['gameId', 'playId', 'frameId'])
    
    net_x_force_offense = offense_grouped['x_force'].sum()
    net_y_force_offense = offense_grouped['y_force'].sum()
    
    net_x_force_defense = defense_grouped['x_force'].sum()
    net_y_force_defense = defense_grouped['y_force'].sum()
    
#     display(net_x_force_defense)
#     display(net_x_force_offense)
#     print(net_x_force_defense.index)
#     print(net_x_force_offense.index)
    x_idx = net_x_force_defense.index.union(net_x_force_offense.index)
    y_idx = net_y_force_defense.index.union(net_y_force_offense.index)
    
    total_x_force = net_x_force_defense.reindex(x_idx, fill_value = 0) + net_x_force_offense.reindex(x_idx, fill_value = 0)
    total_y_force = net_y_force_defense.reindex(y_idx, fill_value = 0) + net_y_force_offense.reindex(y_idx, fill_value = 0)
    
    
    total_x_force_df = total_x_force.to_frame(name = 'net_x_force').reset_index().set_index(['gameId', 'playId', 'frameId'])
    total_y_force_df = total_y_force.to_frame(name = 'net_y_force').reset_index().set_index(['gameId', 'playId', 'frameId'])
    
    total_force_df = pd.concat([total_x_force_df, total_y_force_df], axis = 1).reset_index()
    display(total_force_df)
    return total_force_df

calculate_net_forces(week1_merged)


Unnamed: 0,gameId,playId,frameId,net_x_force,net_y_force
0,2021090900,97,1,94.399384,227.780334
1,2021090900,97,2,94.768498,171.833646
2,2021090900,97,3,166.835390,93.834388
3,2021090900,97,4,1091.172216,197.718974
4,2021090900,97,5,2343.160508,590.872988
...,...,...,...,...,...
48609,2021091300,4845,30,-4915.398328,108.360923
48610,2021091300,4845,31,-4610.876686,266.981323
48611,2021091300,4845,32,-4342.330493,520.941860
48612,2021091300,4845,33,-4168.087180,760.582992


Unnamed: 0,gameId,playId,frameId,net_x_force,net_y_force
0,2021090900,97,1,94.399384,227.780334
1,2021090900,97,2,94.768498,171.833646
2,2021090900,97,3,166.835390,93.834388
3,2021090900,97,4,1091.172216,197.718974
4,2021090900,97,5,2343.160508,590.872988
...,...,...,...,...,...
48609,2021091300,4845,30,-4915.398328,108.360923
48610,2021091300,4845,31,-4610.876686,266.981323
48611,2021091300,4845,32,-4342.330493,520.941860
48612,2021091300,4845,33,-4168.087180,760.582992


In [27]:
"""
Find y locations of tackles at start of frame, and partition dataframes

"""
offense_grouped = week1_offense.groupby(['gameId', 'playId', 'frameId'])
# lg_y, rg_y = find_tackles_y(week1_merged)
# print(lg_y, rg_y)


def find_lt_y(df, simple = False):

    lg_y = df[df['pff_positionLinedUp'] == 'LT']['y']
    return lg_y.values[0]

def find_rt_y(df, simple = False):
    # if simple:
    #     return df[(df['pff_positionLinedUp'] == 'LT') & (df['frameId'] == 1)]['y'].values[0]
    rg_y = df[df['pff_positionLinedUp'] == 'RT']['y']
    return rg_y.values[0]

def create_indicator(df, simple = False):
    """
    @brief
    @param[in]
    @param[out]
    """
    if simple:
        to_merge1 = df.groupby(['gameId', 'playId']).apply(lambda x : find_lt_y(x, True)).reset_index().rename(columns = {0 : 'lt_y'})
        to_merge2 = df.groupby(['gameId', 'playId']).apply(lambda x : find_rt_y(x, True)).reset_index().rename(columns = {0 : 'rt_y'})
        new_df = pd.merge(df, to_merge1, how="left",on = ["playId","gameId"])
        new_df = pd.merge(new_df, to_merge2, how="left",on = ["playId","gameId"])
    else:
    # first two merges take the longest ~30-60 seconds
        to_merge1 = df.groupby(['gameId', 'playId', 'frameId']).apply(lambda x : find_lt_y(x, True)).reset_index().rename(columns = {0 : 'lt_y'})
        to_merge2 = df.groupby(['gameId', 'playId', 'frameId']).apply(lambda x : find_rt_y(x, True)).reset_index().rename(columns = {0 : 'rt_y'})
    
        new_df = pd.merge(df, to_merge1, how="left",on = ["frameId","playId","gameId"])
        new_df = pd.merge(new_df, to_merge2, how="left",on = ["frameId","playId","gameId"])
    
    new_df['max_y'] = new_df[['lt_y', 'rt_y']].max(axis = 1)
    new_df['min_y'] = new_df[['lt_y', 'rt_y']].min(axis = 1)
    new_df['indicator'] = np.where(new_df['y'] < new_df['min_y'], -1, 
                                         np.where((new_df['min_y'] <= new_df['y']) & (new_df['y'] <= new_df['max_y']), 0, 
                                                 1))
    return new_df

new_df = create_indicator(week1_merged, simple = True)
    
    

In [None]:
display(new_df)

In [29]:
test = new_df.groupby(['indicator']).apply(calculate_net_forces)

Unnamed: 0,gameId,playId,frameId,net_x_force,net_y_force
0,2021090900,97,1,92.944313,225.388103
1,2021090900,97,2,93.165687,169.537781
2,2021090900,97,3,60.928573,111.658176
3,2021090900,97,4,164.840788,17.867436
4,2021090900,97,5,465.906475,-77.330013
...,...,...,...,...,...
40439,2021091300,4845,26,-67.925403,48.218562
40440,2021091300,4845,27,-91.187775,66.227488
40441,2021091300,4845,28,-182.732102,136.105115
40442,2021091300,4845,29,-287.052391,221.539360


Unnamed: 0,gameId,playId,frameId,net_x_force,net_y_force
0,2021090900,97,1,1.455071,2.392231
1,2021090900,97,2,1.602810,2.295865
2,2021090900,97,3,105.906817,-17.823788
3,2021090900,97,4,883.961408,212.598808
4,2021090900,97,5,761.824479,577.008782
...,...,...,...,...,...
48427,2021091300,4845,30,-3393.126367,-241.856576
48428,2021091300,4845,31,-4097.112457,15.086708
48429,2021091300,4845,32,-4073.356712,63.379289
48430,2021091300,4845,33,-4059.896050,125.964459


Unnamed: 0,gameId,playId,frameId,net_x_force,net_y_force
0,2021090900,97,1,0.000000,0.000000
1,2021090900,97,2,0.000000,0.000000
2,2021090900,97,3,0.000000,0.000000
3,2021090900,97,4,42.370021,-32.747270
4,2021090900,97,5,1115.429553,91.194218
...,...,...,...,...,...
40492,2021091300,4845,30,-1169.129609,70.019130
40493,2021091300,4845,31,-513.764229,251.894616
40494,2021091300,4845,32,-268.973781,457.562571
40495,2021091300,4845,33,-108.191130,634.618533


In [30]:
test1 = test.reset_index(level = 1, drop = True)
test1 = test1.reset_index()
display(test1)
groups = []
for n, g in test1.groupby('indicator'):
    groups.append(g)
#     df = start.merge(g, how = 'cross', on = [['gameId', 'playId', 'frameId']])
#     display(df)
#     break
display(groups[0])
display(groups[1])
display(groups[2])
df = groups[0].merge(groups[1], how = 'outer', on = ['gameId', 'playId', 'frameId'])
df = df.merge(groups[2], how = 'outer', on = ['gameId', 'playId', 'frameId'])
display(df)

df = df.rename(columns = {'net_x_force' : 'net_x_force_top', 'net_y_force' : 'net_y_force_top',
                         'net_x_force_x' : 'net_x_force_bottom', 'net_y_force_x' : 'net_y_force_bottom',
                         'net_x_force_y' : 'net_x_force_middle', 'net_y_force' : 'net_y_force_middle'})
display(df)
df = df.drop(columns = ['indicator', 'indicator_x', 'indicator_y'])
display(df)

Unnamed: 0,indicator,gameId,playId,frameId,net_x_force,net_y_force
0,-1,2021090900,97,1,92.944313,225.388103
1,-1,2021090900,97,2,93.165687,169.537781
2,-1,2021090900,97,3,60.928573,111.658176
3,-1,2021090900,97,4,164.840788,17.867436
4,-1,2021090900,97,5,465.906475,-77.330013
...,...,...,...,...,...,...
129368,1,2021091300,4845,30,-1169.129609,70.019130
129369,1,2021091300,4845,31,-513.764229,251.894616
129370,1,2021091300,4845,32,-268.973781,457.562571
129371,1,2021091300,4845,33,-108.191130,634.618533


Unnamed: 0,indicator,gameId,playId,frameId,net_x_force,net_y_force
0,-1,2021090900,97,1,92.944313,225.388103
1,-1,2021090900,97,2,93.165687,169.537781
2,-1,2021090900,97,3,60.928573,111.658176
3,-1,2021090900,97,4,164.840788,17.867436
4,-1,2021090900,97,5,465.906475,-77.330013
...,...,...,...,...,...,...
40439,-1,2021091300,4845,26,-67.925403,48.218562
40440,-1,2021091300,4845,27,-91.187775,66.227488
40441,-1,2021091300,4845,28,-182.732102,136.105115
40442,-1,2021091300,4845,29,-287.052391,221.539360


Unnamed: 0,indicator,gameId,playId,frameId,net_x_force,net_y_force
40444,0,2021090900,97,1,1.455071,2.392231
40445,0,2021090900,97,2,1.602810,2.295865
40446,0,2021090900,97,3,105.906817,-17.823788
40447,0,2021090900,97,4,883.961408,212.598808
40448,0,2021090900,97,5,761.824479,577.008782
...,...,...,...,...,...,...
88871,0,2021091300,4845,30,-3393.126367,-241.856576
88872,0,2021091300,4845,31,-4097.112457,15.086708
88873,0,2021091300,4845,32,-4073.356712,63.379289
88874,0,2021091300,4845,33,-4059.896050,125.964459


Unnamed: 0,indicator,gameId,playId,frameId,net_x_force,net_y_force
88876,1,2021090900,97,1,0.000000,0.000000
88877,1,2021090900,97,2,0.000000,0.000000
88878,1,2021090900,97,3,0.000000,0.000000
88879,1,2021090900,97,4,42.370021,-32.747270
88880,1,2021090900,97,5,1115.429553,91.194218
...,...,...,...,...,...,...
129368,1,2021091300,4845,30,-1169.129609,70.019130
129369,1,2021091300,4845,31,-513.764229,251.894616
129370,1,2021091300,4845,32,-268.973781,457.562571
129371,1,2021091300,4845,33,-108.191130,634.618533


Unnamed: 0,indicator_x,gameId,playId,frameId,net_x_force_x,net_y_force_x,indicator_y,net_x_force_y,net_y_force_y,indicator,net_x_force,net_y_force
0,-1.0,2021090900,97,1,92.944313,225.388103,0.0,1.455071,2.392231,1.0,0.000000,0.000000
1,-1.0,2021090900,97,2,93.165687,169.537781,0.0,1.602810,2.295865,1.0,0.000000,0.000000
2,-1.0,2021090900,97,3,60.928573,111.658176,0.0,105.906817,-17.823788,1.0,0.000000,0.000000
3,-1.0,2021090900,97,4,164.840788,17.867436,0.0,883.961408,212.598808,1.0,42.370021,-32.747270
4,-1.0,2021090900,97,5,465.906475,-77.330013,0.0,761.824479,577.008782,1.0,1115.429553,91.194218
...,...,...,...,...,...,...,...,...,...,...,...,...
48609,,2021091210,146,43,,,,,,1.0,-1934.920416,2761.192229
48610,,2021091210,146,44,,,,,,1.0,-2059.174077,2692.567357
48611,,2021091210,146,45,,,,,,1.0,-2141.343067,2622.170778
48612,,2021091210,146,46,,,,,,1.0,-2191.702721,2732.787281


Unnamed: 0,indicator_x,gameId,playId,frameId,net_x_force_bottom,net_y_force_bottom,indicator_y,net_x_force_middle,net_y_force_y,indicator,net_x_force_top,net_y_force_middle
0,-1.0,2021090900,97,1,92.944313,225.388103,0.0,1.455071,2.392231,1.0,0.000000,0.000000
1,-1.0,2021090900,97,2,93.165687,169.537781,0.0,1.602810,2.295865,1.0,0.000000,0.000000
2,-1.0,2021090900,97,3,60.928573,111.658176,0.0,105.906817,-17.823788,1.0,0.000000,0.000000
3,-1.0,2021090900,97,4,164.840788,17.867436,0.0,883.961408,212.598808,1.0,42.370021,-32.747270
4,-1.0,2021090900,97,5,465.906475,-77.330013,0.0,761.824479,577.008782,1.0,1115.429553,91.194218
...,...,...,...,...,...,...,...,...,...,...,...,...
48609,,2021091210,146,43,,,,,,1.0,-1934.920416,2761.192229
48610,,2021091210,146,44,,,,,,1.0,-2059.174077,2692.567357
48611,,2021091210,146,45,,,,,,1.0,-2141.343067,2622.170778
48612,,2021091210,146,46,,,,,,1.0,-2191.702721,2732.787281


Unnamed: 0,gameId,playId,frameId,net_x_force_bottom,net_y_force_bottom,net_x_force_middle,net_y_force_y,net_x_force_top,net_y_force_middle
0,2021090900,97,1,92.944313,225.388103,1.455071,2.392231,0.000000,0.000000
1,2021090900,97,2,93.165687,169.537781,1.602810,2.295865,0.000000,0.000000
2,2021090900,97,3,60.928573,111.658176,105.906817,-17.823788,0.000000,0.000000
3,2021090900,97,4,164.840788,17.867436,883.961408,212.598808,42.370021,-32.747270
4,2021090900,97,5,465.906475,-77.330013,761.824479,577.008782,1115.429553,91.194218
...,...,...,...,...,...,...,...,...,...
48609,2021091210,146,43,,,,,-1934.920416,2761.192229
48610,2021091210,146,44,,,,,-2059.174077,2692.567357
48611,2021091210,146,45,,,,,-2141.343067,2622.170778
48612,2021091210,146,46,,,,,-2191.702721,2732.787281


In [None]:
df.to_csv("partitioned_forces.csv")

In [None]:
def animate_play(tracking_df, play_df,players,pffScoutingData, gameId,playId):
    selected_play_df = play_df[(play_df.playId==playId)&(play_df.gameId==gameId)].copy()
    
    tracking_players_df = pd.merge(tracking_df,players,how="left",on = "nflId")
    tracking_players_df = pd.merge(tracking_players_df,pffScoutingData,how="left",on = ["nflId","playId","gameId"])
    selected_tracking_df = tracking_players_df[(tracking_players_df.playId==playId)&(tracking_players_df.gameId==gameId)].copy()

    sorted_frame_list = selected_tracking_df.frameId.unique()
    sorted_frame_list.sort()

    # get play General information 
    line_of_scrimmage = selected_play_df.absoluteYardlineNumber.values[0]
    first_down_marker = line_of_scrimmage + selected_play_df.yardsToGo.values[0]
    down = selected_play_df.down.values[0]
    quarter = selected_play_df.quarter.values[0]
    gameClock = selected_play_df.gameClock.values[0]
    playDescription = selected_play_df.playDescription.values[0]
    # Handle case where we have a really long Play Description and want to split it into two lines
    if len(playDescription.split(" "))>15 and len(playDescription)>115:
        playDescription = " ".join(playDescription.split(" ")[0:16]) + "<br>" + " ".join(playDescription.split(" ")[16:])

    # initialize plotly start and stop buttons for animation
    updatemenus_dict = [
        {
            "buttons": [
                {
                    "args": [None, {"frame": {"duration": 100, "redraw": False},
                                "fromcurrent": True, "transition": {"duration": 0}}],
                    "label": "Play",
                    "method": "animate"
                },
                {
                    "args": [[None], {"frame": {"duration": 0, "redraw": False},
                                      "mode": "immediate",
                                      "transition": {"duration": 0}}],
                    "label": "Pause",
                    "method": "animate"
                }
            ],
            "direction": "left",
            "pad": {"r": 10, "t": 87},
            "showactive": False,
            "type": "buttons",
            "x": 0.1,
            "xanchor": "right",
            "y": 0,
            "yanchor": "top"
        }
    ]
    # initialize plotly slider to show frame position in animation
    sliders_dict = {
        "active": 0,
        "yanchor": "top",
        "xanchor": "left",
        "currentvalue": {
            "font": {"size": 20},
            "prefix": "Frame:",
            "visible": True,
            "xanchor": "right"
        },
        "transition": {"duration": 300, "easing": "cubic-in-out"},
        "pad": {"b": 10, "t": 50},
        "len": 0.9,
        "x": 0.1,
        "y": 0,
        "steps": []
    }


    frames = []
    for frameId in sorted_frame_list:
        data = []
        # Add Numbers to Field 
        data.append(
            go.Scatter(
                x=np.arange(20,110,10), 
                y=[5]*len(np.arange(20,110,10)),
                mode='text',
                text=list(map(str,list(np.arange(20, 61, 10)-10)+list(np.arange(40, 9, -10)))),
                textfont_size = 30,
                textfont_family = "Courier New, monospace",
                textfont_color = "#ffffff",
                showlegend=False,
                hoverinfo='none'
            )
        )
        data.append(
            go.Scatter(
                x=np.arange(20,110,10), 
                y=[53.5-5]*len(np.arange(20,110,10)),
                mode='text',
                text=list(map(str,list(np.arange(20, 61, 10)-10)+list(np.arange(40, 9, -10)))),
                textfont_size = 30,
                textfont_family = "Courier New, monospace",
                textfont_color = "#ffffff",
                showlegend=False,
                hoverinfo='none'
            )
        )
        # Add line of scrimage 
        data.append(
            go.Scatter(
                x=[line_of_scrimmage,line_of_scrimmage], 
                y=[0,53.5],
                line_dash='dash',
                line_color='blue',
                showlegend=False,
                hoverinfo='none'
            )
        )
        # Add First down line 
        data.append(
            go.Scatter(
                x=[first_down_marker,first_down_marker], 
                y=[0,53.5],
                line_dash='dash',
                line_color='yellow',
                showlegend=False,
                hoverinfo='none'
            )
        )
        # Plot Players
        for team in selected_tracking_df.team.unique():
            plot_df = selected_tracking_df[(selected_tracking_df.team==team)&(selected_tracking_df.frameId==frameId)].copy()
            if team != "football":
                hover_text_array=[]
                for nflId in plot_df.nflId:
                    selected_player_df = plot_df[plot_df.nflId==nflId]
                    hover_text_array.append("nflId:{}<br>displayName:{}<br>Position:{}<br>Role:{}".format(selected_player_df["nflId"].values[0],
                                                                                      selected_player_df["displayName"].values[0],
                                                                                      selected_player_df["pff_positionLinedUp"].values[0],
                                                                                      selected_player_df["pff_role"].values[0]))
                data.append(go.Scatter(x=plot_df["x"], y=plot_df["y"],mode = 'markers',marker_color=colors[team],name=team,hovertext=hover_text_array,hoverinfo="text"))
                #appending arrows here
                data.append(go.Scatter(x=plot_df["x"], y = plot_df["y"], mode = 'lines', marker_color="red"))
            else:
                data.append(go.Scatter(x=plot_df["x"], y=plot_df["y"],mode = 'markers',marker_color=colors[team],name=team,hoverinfo='none'))
        # add frame to slider
        slider_step = {"args": [
            [frameId],
            {"frame": {"duration": 100, "redraw": False},
             "mode": "immediate",
             "transition": {"duration": 0}}
        ],
            "label": str(frameId),
            "method": "animate"}
        sliders_dict["steps"].append(slider_step)
        frames.append(go.Frame(data=data, name=str(frameId)))

    scale=10
    layout = go.Layout(
        autosize=False,
        width=120*scale,
        height=60*scale,
        xaxis=dict(range=[0, 120], autorange=False, tickmode='array',tickvals=np.arange(10, 111, 5).tolist(),showticklabels=False),
        yaxis=dict(range=[0, 53.3], autorange=False,showgrid=False,showticklabels=False),

        plot_bgcolor='#00B140',
        # Create title and add play description at the bottom of the chart for better visual appeal
        title=f"GameId: {gameId}, PlayId: {playId}<br>{gameClock} {quarter}Q"+"<br>"*19+f"{playDescription}",
        updatemenus=updatemenus_dict,
        sliders = [sliders_dict]
    )

    fig = go.Figure(
        data=frames[0]["data"],
        layout= layout,
        frames=frames[1:]
    )
    # Create First Down Markers 
    for y_val in [0,53]:
        fig.add_annotation(
                x=first_down_marker,
                y=y_val,
                text=str(down),
                showarrow=False,
                font=dict(
                    family="Courier New, monospace",
                    size=16,
                    color="black"
                    ),
                align="center",
                bordercolor="black",
                borderwidth=2,
                borderpad=4,
                bgcolor="#ff7f0e",
                opacity=1
                )

    return fig

In [None]:
import plotly.graph_objects as go
colors = {
    'ARI':"#97233F", 
    'ATL':"#A71930", 
    'BAL':'#241773', 
    'BUF':"#00338D", 
    'CAR':"#0085CA", 
    'CHI':"#C83803", 
    'CIN':"#FB4F14", 
    'CLE':"#311D00", 
    'DAL':'#003594',
    'DEN':"#FB4F14", 
    'DET':"#0076B6", 
    'GB':"#203731", 
    'HOU':"#03202F", 
    'IND':"#002C5F", 
    'JAX':"#9F792C", 
    'KC':"#E31837", 
    'LA':"#003594", 
    'LAC':"#0080C6", 
    'LV':"#000000",
    'MIA':"#008E97", 
    'MIN':"#4F2683", 
    'NE':"#002244", 
    'NO':"#D3BC8D", 
    'NYG':"#0B2265", 
    'NYJ':"#125740", 
    'PHI':"#004C54", 
    'PIT':"#FFB612", 
    'SEA':"#69BE28", 
    'SF':"#AA0000",
    'TB':'#D50A0A', 
    'TEN':"#4B92DB", 
    'WAS':"#5A1414", 
    'football':'#CBB67C'
}

# input was originally weeks1_dict
animate_play(weeks_dict[1], play,players,pff, 2021090900,97).show()

In [None]:
display(pff.head())

In [None]:
#consider partitioning the field in 6ths, consider force in those regions. maybe consider the coverage as well
# weigh by summing up the inverses, 1/
# multiply this "avg" by the number of players on offense or defense
# random forest
# gradient boosted trees
# consider making a prediciton for a specific frame
# consider using frame id as categorical variable
# a decision rule could be if frame 1, no hit, no sack, no hurry 
# probability increases as frame increases
# xgboost
# how does model perform over time?
# try a gam model, get some linear terms, smoothing terms, partial response terms
# autocorrelation, between frames? how would we capture that
# ELO with football players?