In [113]:
import pandas as pd
import numpy as np
velo = pd.read_csv('boost_models/data/velocity_calcs_pls_final.csv')
target = pd.read_csv('nfl-big-data-bowl-2021-bonus/targetedReceiver.csv')

In [114]:
velo.shape

(13267737, 16)

In [43]:
target.shape

(19239, 3)

#### Left merge velocity dataframe with target receiver dataframe

In [115]:
df = velo.merge(target,how='left', left_on=['gameId','playId','nflId'], right_on=['gameId','playId','targetNflId'])
df = df.rename(columns={"targetNflId":"target"})
df = df.drop(columns='Unnamed: 0')

#### Make target a dummy variable

In [116]:
df['target'] = np.where(df['target'] > 1, 1,0)
df.target.value_counts()

0    12327197
1      940540
Name: target, dtype: int64

In [117]:
df

Unnamed: 0,gameId,playId,nflId,position,frame,x,y,s,o,a,dir,dis,event,v_x,v_y,target
0,2018090909,387,497095,QB,11,28.41,29.83,0.28,94.89,2.43,252.97,0.02,ball_snap,-0.267722,-0.082004,0
1,2018090909,387,2495190,OLB,11,30.80,35.18,0.45,221.46,3.97,252.57,0.03,ball_snap,-0.429338,-0.134793,0
2,2018090909,387,2495281,ILB,11,33.76,26.20,0.07,274.64,0.68,262.74,0.01,ball_snap,-0.069439,-0.008846,0
3,2018090909,387,2506106,WR,11,28.14,18.48,0.43,97.02,0.92,71.01,0.04,ball_snap,0.406597,0.139923,0
4,2018090909,387,2532804,ILB,11,34.26,31.03,0.15,272.11,1.70,270.60,0.00,ball_snap,-0.149992,0.001571,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13267732,2018110406,4118,2556445,CB,74,21.26,25.87,1.56,147.97,1.93,145.37,0.17,tackle,0.886508,-1.283629,0
13267733,2018110406,4118,2558138,RB,74,29.81,11.50,0.15,282.02,1.06,53.14,0.01,tackle,0.120016,0.089979,1
13267734,2018110406,4118,2558184,SS,74,22.77,13.60,1.10,121.10,0.83,161.37,0.11,tackle,0.351401,-1.042361,0
13267735,2018110406,4118,2560830,CB,74,25.25,9.22,2.07,46.70,1.38,34.78,0.21,tackle,1.180784,1.700191,0


#### get dummies for position

In [108]:
df_cat = df.select_dtypes(exclude=['int64', 'float64'])
df = pd.get_dummies(df, df_cat.columns.values)

Unnamed: 0,gameId,playId,nflId,frame,a,o,x,y,v_x,v_y,...,position_MLB,position_NT,position_OLB,position_P,position_QB,position_RB,position_S,position_SS,position_TE,position_WR
0,2018090909,387,497095,11,2.43,94.89,28.41,29.83,-0.267722,-0.082004,...,0,0,0,0,1,0,0,0,0,0
1,2018090909,387,2495190,11,3.97,221.46,30.80,35.18,-0.429338,-0.134793,...,0,0,1,0,0,0,0,0,0,0
2,2018090909,387,2495281,11,0.68,274.64,33.76,26.20,-0.069439,-0.008846,...,0,0,0,0,0,0,0,0,0,0
3,2018090909,387,2506106,11,0.92,97.02,28.14,18.48,0.406597,0.139923,...,0,0,0,0,0,0,0,0,0,1
4,2018090909,387,2532804,11,1.70,272.11,34.26,31.03,-0.149992,0.001571,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13267732,2018110406,4118,2556445,74,1.93,147.97,21.26,25.87,0.886508,-1.283629,...,0,0,0,0,0,0,0,0,0,0
13267733,2018110406,4118,2558138,74,1.06,282.02,29.81,11.50,0.120016,0.089979,...,0,0,0,0,0,1,0,0,0,0
13267734,2018110406,4118,2558184,74,0.83,121.10,22.77,13.60,0.351401,-1.042361,...,0,0,0,0,0,0,0,1,0,0
13267735,2018110406,4118,2560830,74,1.38,46.70,25.25,9.22,1.180784,1.700191,...,0,0,0,0,0,0,0,0,0,0


In [112]:
df.columns

Index(['gameId', 'playId', 'nflId', 'frame', 'a', 'o', 'x', 'y', 'v_x', 'v_y',
       'target', 'position_CB', 'position_DB', 'position_DE', 'position_DL',
       'position_DT', 'position_FB', 'position_FS', 'position_HB',
       'position_ILB', 'position_K', 'position_LB', 'position_LS',
       'position_MLB', 'position_NT', 'position_OLB', 'position_P',
       'position_QB', 'position_RB', 'position_S', 'position_SS',
       'position_TE', 'position_WR'],
      dtype='object')

In [111]:
df.to_csv('boost_models/data/data_for_model.csv')
print('Dataframe has been exported to data_for_model.csv in boost_models>data')

Dataframe has been exported to data_for_model.csv in boost_models>data


In [135]:
df1 = df.groupby(['gameId','playId']).size().reset_index().rename(columns={0:'count'})

Unnamed: 0,gameId,playId,count
0,2018090600,75,624
1,2018090600,146,494
2,2018090600,168,481
3,2018090600,190,806
4,2018090600,256,396
...,...,...,...
17766,2018123015,3794,585
17767,2018123015,3819,559
17768,2018123015,3969,650
17769,2018123015,4057,728
