**T**ime for **I**ntended **P**ass to be **T**hrown **O**ver **E**xpected (TIPTOE)

*Still workshopping the name*

This notebook is the combination of two trained models.

The first model, named **E**xpected **L**ineman **M**otion (ELM), predicts how a offensive or defensive lineman will move given their position and other player positions on passing plays. 

The second model, named **E**xpected **T**ime **T**o **T**hrow (ET3), predicts how much longer a QB has to attempt a pass given positions, velocity, and acceleration of offensive and defensive lineman at any given moment. 

We use these two models to predict the individual contribution of a lineman's motion to the time to throw. For a given frame, we can calculated the ET3 value. We can then compare an individual's contribution by using the previous frame to compute the expected position of the individual player. Using the new position, another ET3 value is calulated. The difference in the ET3 values is then the TIPTOE value for that individual player for that given frame. This can then be repeated over all frames and a cumulative time over expected can be calculated for a play. 

In [1]:
import numpy as np
import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'

import csv
import random
from matplotlib import pyplot as plt
import math
from tqdm.notebook import tqdm

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
#from tensorflow.keras.callbacks import EarlyStopping

from sklearn.model_selection import train_test_split

import seaborn as sns 
from scipy.stats import pearsonr# This Python 3 environment comes with many helpful analytics libraries installed

In [2]:
# Import BDB 2023 Data
# Pass plays week 1-8 season 2021

games_df = pd.read_csv ('../input/nfl-big-data-bowl-2023/games.csv')
players_df = pd.read_csv ('../input/nfl-big-data-bowl-2023/players.csv')
plays_df = pd.read_csv ('../input/nfl-big-data-bowl-2023/plays.csv')
pff_df = pd.read_csv('../input/nfl-big-data-bowl-2023/pffScoutingData.csv')
weeks = []
for i in [5,6,7,8]:
    filename = '../input/nfl-big-data-bowl-2023/week'+str(i)+'.csv'
    weeks.append(pd.read_csv(filename))
weeks_df = pd.concat(weeks)

# merge scout data onto the tracking data 
weeks_df = weeks_df.merge(pff_df[['gameId', 'playId', 'nflId', 'pff_role','pff_positionLinedUp']], how='left')

# Import team color data from Lee Sharpe

teamcolors = pd.read_csv("https://raw.githubusercontent.com/leesharpe/nfldata/master/data/teamcolors.csv")

In [3]:
# LOAD PREVIOUS MODELS
model_OLM = keras.models.load_model('/kaggle/input/lineman-motion/model_OL_movement.h5')
model_DLM = keras.models.load_model('/kaggle/input/lineman-motion/model_DL_movement.h5')

model_ET3 = keras.models.load_model('/kaggle/input/ttt-final/model_TTT_passplays.h5')

2022-12-17 02:35:19.672170: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


In [4]:
# get all plays that resulted in a QB pass or sack
_pass = ['autoevent_passforward', 'pass_forward', 'qb_sack','qb_strip_sack']
_pass_df = weeks_df.loc[weeks_df['event'].isin(_pass)]
_gpf_pass = _pass_df.groupby(['gameId','playId','frameId']).size().reset_index().drop(columns=[0])
__gpf_pass = _gpf_pass.rename(columns={'frameId':'event_frame'})
__gpf_pass = __gpf_pass.merge(plays_df[['gameId','playId','dropBackType']], how='left')
gpf = __gpf_pass.loc[__gpf_pass['dropBackType']=='TRADITIONAL']
#gpf = __gpf_pass

In [5]:
def data_prep_ET3(one_frame):
    meta_cols = ['gameId', 'playId', 'nflId', 'frameId']
    data_cols = ['x', 'y', 's', 'o', 'dis', 'a','pff_role']
    meta_info = one_frame[meta_cols]
    play_dir = -2*int((one_frame.iloc[0].playDirection == 'left'))+1
    df = one_frame[data_cols]
    QB = one_frame.loc[one_frame['pff_role']=='Pass'][data_cols]
    df['pff_role'] = (df['pff_role'].isin(['Pass','Pass Block'])).astype(int)
    arr = df.values
    xv = play_dir*np.sin(arr[:,3]*np.pi/180)*arr[:,2]
    yv = play_dir*np.cos(arr[:,3]*np.pi/180)*arr[:,2]
    arr[:,[2,3]] = np.vstack((xv,yv)).T
    arr[:,:2] = play_dir*(arr[:,:2] - QB[['x','y']].values[0])
    dists = (arr[:,0]**2 + arr[:,1]**2)**0.5
    st_arr = np.array([[-100,-100,0,0,0,0,0.5]]*12,dtype='float')
    st_arr[:len(dists)] = arr[dists.argsort()[:12]]
    
    play_info = plays_df.loc[(plays_df['gameId']==one_frame.gameId.values[0])&
                             (plays_df['playId']==one_frame.playId.values[0])]
    
    meta_feats = [one_frame['sec_since_snap'].values[0].tolist(),
                  play_info.down.values[0],
                  play_info.yardsToGo.values[0],
                  (one_frame.pff_role == 'Pass Rush').sum(),
                  (one_frame.pff_role == 'Pass Block').sum()]
    feat_list = meta_feats + np.concatenate(st_arr).tolist()
    
    return np.array(np.reshape(feat_list,(1,89)))

def ELM2ET3(one_frame,change,nflId):
    meta_cols = ['gameId', 'playId', 'nflId', 'frameId']
    nflId_list = one_frame.nflId.values
    n = np.where(nflId_list==nflId)[0][0]
    data_cols = ['x', 'y', 's', 'o', 'dis', 'a','pff_role']
    meta_info = one_frame[meta_cols]
    play_dir = -2*int((one_frame.iloc[0].playDirection == 'left'))+1
    df = one_frame[data_cols]
    QB = one_frame.loc[one_frame['pff_role']=='Pass'][data_cols]
    df['pff_role'] = (df['pff_role'].isin(['Pass','Pass Block'])).astype(int)
    arr = df.values
    xv = play_dir*np.sin(arr[:,3]*np.pi/180)*arr[:,2]
    yv = play_dir*np.cos(arr[:,3]*np.pi/180)*arr[:,2]
    arr[:,[2,3]] = np.vstack((xv,yv)).T
    arr[:,:2] = play_dir*(arr[:,:2] - QB[['x','y']].values[0])
    arr[n,:2] = change[:2]
    arr[n,[2,3,4]] = change[[2,3,4]]
    dists = (arr[:,0]**2 + arr[:,1]**2)**0.5
    st_arr = np.array([[-100,-100,0,0,0,0,0.5]]*12,dtype='float')
    st_arr[:len(dists)] = arr[dists.argsort()[:12]]
    
    play_info = plays_df.loc[(plays_df['gameId']==one_frame.gameId.values[0])&
                             (plays_df['playId']==one_frame.playId.values[0])]
    
    meta_feats = [one_frame['sec_since_snap'].values[0].tolist(),
                  play_info.down.values[0],
                  play_info.yardsToGo.values[0],
                  (one_frame.pff_role == 'Pass Rush').sum(),
                  (one_frame.pff_role == 'Pass Block').sum()]
    feat_list = meta_feats + np.concatenate(st_arr).tolist()
    
    return feat_list

def data_prep_ELM(one_frame,next_frame):
    t_orig = np.array(model_ET3(data_prep_ET3(next_frame)))[0][0]
    meta_cols = ['gameId', 'playId', 'nflId', 'frameId']
    data_cols = ['x', 'y', 's', 'o', 'dis', 'a','pff_role']
    next_cols = ['x', 'y', 's', 'o', 'a']
    meta_info = one_frame[meta_cols]
    play_dir = -2*int((one_frame.iloc[0].playDirection == 'left'))+1
    QB = one_frame.loc[one_frame['pff_role']=='Pass'][data_cols]
    no_QB = one_frame.loc[one_frame['pff_role']!='Pass'][data_cols]
    no_QB['pff_role'] = (no_QB['pff_role'] == 'Pass Block').astype(int)
    nflId_list = one_frame.loc[one_frame['pff_role']!='Pass'].nflId.values
    no_QB_arr = no_QB.values
    xv = np.sin(no_QB_arr[:,3]*np.pi/180)*no_QB_arr[:,2]
    yv = np.cos(no_QB_arr[:,3]*np.pi/180)*no_QB_arr[:,2]
    no_QB_arr[:,[2,3]] = np.vstack((xv,yv)).T
    est_TTT = []
    c = 0
    for targ in no_QB_arr:
        t_arr = no_QB_arr.copy()
        t_arr[:,:2] = t_arr[:,:2] - targ[:2]
        dists = (t_arr[:,0]**2 + t_arr[:,1]**2)**0.5
        st_arr = t_arr[dists.argsort()[1:6]]
        st_arr[:,:4] = play_dir*st_arr[:,:4]
        indiv_feats = (QB[['x','y']].values[0] - targ[:2]).tolist()+targ[2:].tolist()
        feats = indiv_feats + np.concatenate(st_arr).tolist()
        feats_arr = np.reshape(np.array(feats),(1,42))
        if targ[6] == 1: #if OL
            change = np.array(model_OLM(feats_arr))[0]
        elif targ[6] == 0: #if DL
            change = np.array(model_DLM(feats_arr))[0]
        change[:2] = play_dir*(targ[:2] - QB[['x','y']].values[0])
        feats_ch = ELM2ET3(next_frame,change,nflId_list[c])
        feats_ch_arr = np.reshape(np.array(feats_ch),(1,89))
        est_TTT.append([meta_info.gameId.values[0],meta_info.playId.values[0],meta_info.frameId.values[0],nflId_list[c],np.array(model_ET3(feats_ch_arr))[0][0] - t_orig])
        c += 1
    return est_TTT 



In [6]:
TIPTOE_list = []
df = pd.DataFrame(columns = ['nflId', 'TTT', 'TTTOE', 'gameId', 'playId', 'frameId'])
for i, data in tqdm(gpf.iterrows(),total=len(gpf)):
    one_play = weeks_df.loc[(weeks_df['gameId']==data.gameId)&(weeks_df['playId']==data.playId)]
    one_play['sec_til_event'] = (data.event_frame - one_play.frameId)/10
    snap_fr = one_play.loc[one_play['event'].isin(['autoevent_ballsnap', 'ball_snap'])].frameId.max()
    one_play['sec_since_snap'] = (one_play.frameId - snap_fr)/10
    play_players = one_play.loc[(one_play['frameId'] >= snap_fr - 1) &
                                (one_play['pff_role'].isin(['Pass','Pass Rush','Pass Block'])) &
                                (one_play['sec_til_event'] >= 0)]
    
    if not sorted(play_players.pff_role.unique()) == sorted(['Pass','Pass Rush','Pass Block']):
            continue 
    
    down = plays_df.loc[(plays_df['gameId'] == one_play.gameId.mode()[0]) & (plays_df['playId'] == one_play.playId.mode()[0])].down.values[0]
    togo = plays_df.loc[(plays_df['gameId'] == one_play.gameId.mode()[0]) & (plays_df['playId'] == one_play.playId.mode()[0])].yardsToGo.values[0]
    norm_pos = one_play.loc[(one_play['team'] == 'football') & (one_play['frameId'] == snap_fr)][['x','y']]
    
    _target = []
    for fid in play_players.frameId.unique()[1:]:
        one_frame = play_players.loc[play_players['frameId']==fid-1]
        next_frame = play_players.loc[play_players['frameId']==fid]
        t_adj = data_prep_ELM(one_frame,next_frame)
        TIPTOE_list = TIPTOE_list + t_adj
TIPTOE = pd.DataFrame(TIPTOE_list,columns=['gameId', 'playId', 'frameId', 'nflId', 'TTTOE'])

  0%|          | 0/4444 [00:00<?, ?it/s]

In [7]:
TIPTOE.to_csv('TIPTOE_week5-8.csv',index=False)