# Analyses/Models
1. **Punt Analysis** - Explore data; Group plays by Return Type, Touchdown Rate
2. **Field Goal Analysis** - Kicker Average Speed and Prior Impulse Variability
3. **Punt Analysis** - Proximity of Nearest Defender to Punter at time of Kick
4. **Punt Analysis** - Ranking of Gunners and Jammers
4. **Field Goal Analysis/Model** - Clustering of Kickers by *Some Metric*
5. **Punt/Field Goal Analysis/Model** - Correlation between Defense Positioning at Snap and Play Outcome(?)

In [1]:
# Kaggle NFL Big Data Bowl 2022 Submission
from enum import Enum

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

# Read data into memory
games_df = pd.read_csv('../input/nfl-big-data-bowl-2022/games.csv')
players_df = pd.read_csv('../input/nfl-big-data-bowl-2022/players.csv')
plays_df = pd.read_csv('../input/nfl-big-data-bowl-2022/plays.csv')
pff_scouting_df = pd.read_csv('../input/nfl-big-data-bowl-2022/PFFScoutingData.csv')

# Create game plays df
game_plays_df = pd.merge(games_df, plays_df, left_on='gameId', right_on='gameId')

class Column(Enum):
    PLAY_TYPE = 'specialTeamsPlayType'
    POSITION = 'position'
    PLAY_EVENT = 'event'
    GAME_ID = 'gameId'
    PLAY_ID = 'playId'

class Position(Enum):
    KICKER = 'K'
    PUNTER = 'P'
    LONG_SNAPPER = 'LS'

class PlayType(Enum):
    FIELD_GOAL = 'Field Goal'
    PUNT = 'Punt'

class PlayEvent(Enum):
    SNAP = 'ball_snapped'
    FG_ATTEMPT = 'field_goal_attempt'
    PUNT = 'punt'

class PlayResult(Enum):
    BLOCKED = 'Blocked Kick Attempt'
    DOWNED = 'Downed'
    KICK_GOOD = 'Kick Attempt Good'
    KICK_NO_GOOD = 'Kick Attempt No Good'

plays_df.info()
plays_df.head()
pff_scouting_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19979 entries, 0 to 19978
Data columns (total 25 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   gameId                  19979 non-null  int64  
 1   playId                  19979 non-null  int64  
 2   playDescription         19979 non-null  object 
 3   quarter                 19979 non-null  int64  
 4   down                    19979 non-null  int64  
 5   yardsToGo               19979 non-null  int64  
 6   possessionTeam          19979 non-null  object 
 7   specialTeamsPlayType    19979 non-null  object 
 8   specialTeamsResult      19979 non-null  object 
 9   kickerId                19878 non-null  float64
 10  returnerId              6938 non-null   object 
 11  kickBlockerId           100 non-null    float64
 12  yardlineSide            19813 non-null  object 
 13  yardlineNumber          19979 non-null  int64  
 14  gameClock               19979 non-null

In [2]:
# Compare to tracking
# field_goals_2018_df = game_plays_df[(game_plays_df['season'] == 2018) & (plays_df['specialTeamsPlayType'] == FIELD_GOAL)]
# field_goals_2018_df = game_plays_df[(game_plays_df['season'] == 2018)]
# field_goals_2018_df['isKickGood'] = np.where(field_goals_2018_df['specialTeamsResult'] == 'Kick Attempt Good', 1, 0)
# kicker_aggs_2018_df = field_goals_2018_df.groupby('kickerId').agg({ 'isKickGood': ['sum', 'count'] })
# kicker_aggs_2018_df
# Vinatieri
# field_goals_2018_df[(field_goals_2018_df['kickerId'] == 21213.0)].sort_values(['gameId'])
# Janikowski
# field_goals_2018_df[field_goals_2018_df['kickerId'] == 25326.0].sort_values(['gameId']).head(30)
# field_goals_2018_df[(field_goals_2018_df['kickerId'] == 21213.0) & (field_goals_2018_df['specialTeamsPlayType'] == FIELD_GOAL)].sort_values(['gameId']).head(30)
# field_goals_2018_df[(playDescription['playDescription'].str.contains('Vinatieri')) & (field_goals_2018_df['specialTeamsPlayType'] == FIELD_GOAL)].sort_values(['gameId']).head(30)
plays_df[(plays_df['playDescription'].str.contains('Vinatieri')) & (plays_df['gameId'] == 2018123011)]

Unnamed: 0,gameId,playId,playDescription,quarter,down,yardsToGo,possessionTeam,specialTeamsPlayType,specialTeamsResult,kickerId,returnerId,kickBlockerId,yardlineSide,yardlineNumber,gameClock,penaltyCodes,penaltyJerseyNumbers,penaltyYards,preSnapHomeScore,preSnapVisitorScore,passResult,kickLength,kickReturnYardage,playResult,absoluteYardlineNumber
6689,2018123011,482,"A.Vinatieri extra point is GOOD, Center-L.Rhodes, Holder-R.Sanchez.",1,0,0,IND,Extra Point,Kick Attempt Good,21213.0,,,TEN,15,06:39:00,,,,0,6,,,,0,95
6692,2018123011,1109,"A.Vinatieri extra point is GOOD, Center-L.Rhodes, Holder-R.Sanchez.",2,0,0,IND,Extra Point,Kick Attempt Good,21213.0,,,TEN,15,09:26:00,,,,0,13,,,,0,25
6702,2018123011,2474,"A.Vinatieri extra point is GOOD, Center-L.Rhodes, Holder-R.Sanchez.",3,0,0,IND,Extra Point,Kick Attempt Good,21213.0,,,TEN,15,10:46:00,,,,10,23,,,,0,25
6711,2018123011,3828,"(4:00) (Field Goal formation) A.Vinatieri 25 yard field goal is GOOD, Center-L.Rhodes, Holder-R.Sanchez.",4,4,7,IND,Field Goal,Kick Attempt Good,21213.0,,,TEN,7,04:00:00,,,,17,24,,25.0,,0,103
6713,2018123011,4134,"A.Vinatieri extra point is No Good, Wide Right, Center-L.Rhodes, Holder-R.Sanchez.",4,0,0,IND,Extra Point,Kick Attempt No Good,21213.0,,,TEN,15,02:24:00,,,,17,33,,,,0,95


In [3]:
# Distinct play types
plays_df['specialTeamsPlayType'].unique()

array(['Kickoff', 'Punt', 'Field Goal', 'Extra Point'], dtype=object)

In [4]:
# Distinct field goal play result types
# plays_df[plays_df['specialTeamsPlayType'] == FIELD_GOAL]['specialTeamsResult'].unique()
plays_df['specialTeamsResult'].unique()

array(['Touchback', 'Return', 'Kick Attempt Good', 'Fair Catch', 'Downed',
       'Muffed', 'Kick Attempt No Good', 'Out of Bounds',
       'Non-Special Teams Result', 'Blocked Kick Attempt', 'Blocked Punt',
       'Kickoff Team Recovery'], dtype=object)

In [5]:
# Distinct result types
plays_df[plays_df['specialTeamsPlayType'] == 'Punt']['specialTeamsResult'].unique()

array(['Return', 'Touchback', 'Fair Catch', 'Downed', 'Muffed',
       'Out of Bounds', 'Non-Special Teams Result', 'Blocked Punt'],
      dtype=object)

In [6]:
# Non special teams includes trick plays, fumbled snaps, etc...
plays_df[plays_df['specialTeamsResult'] == 'Non-Special Teams Result'].head()

Unnamed: 0,gameId,playId,playDescription,quarter,down,yardsToGo,possessionTeam,specialTeamsPlayType,specialTeamsResult,kickerId,returnerId,kickBlockerId,yardlineSide,yardlineNumber,gameClock,penaltyCodes,penaltyJerseyNumbers,penaltyYards,preSnapHomeScore,preSnapVisitorScore,passResult,kickLength,kickReturnYardage,playResult,absoluteYardlineNumber
43,2018090900,2616,"(13:57) (Punt formation) C.Bojorquez FUMBLES (Aborted) at BUF 27, and recovers at BUF 14. C.Bojorquez to BUF 14 for no gain (T.Bowser).",3,4,8,BUF,Punt,Non-Special Teams Result,,,,BUF,39,13:57:00,,,,26,0,,,,-25,49
274,2018090910,2976,(Kick formation) TWO-POINT CONVERSION ATTEMPT. M.Palardy rushes left end. ATTEMPT FAILS.,4,0,0,CAR,Extra Point,Non-Special Teams Result,,,,DAL,15,14:12:00,,,,16,0,,,,0,25
944,2018092304,391,(9:17) (Punt formation) Direct snap to C.Davis. C.Grant left end to JAX 46 for no gain (L.Sims).,1,4,4,JAX,Punt,Non-Special Teams Result,,,,JAX,46,09:17:00,,,,0,0,,,,0,64
1087,2018092309,2100,"(:03) (Field Goal formation) J.Scott Aborted. H.Bradley FUMBLES at WAS 43, recovered by GB-J.Scott at GB 49. J.Scott pass incomplete short right to M.Crosby.",2,1,10,GB,Field Goal,Non-Special Teams Result,,,,WAS,43,00:03:00,,,,28,10,I,,,0,67
1251,2018092700,2608,(11:03) (Punt formation) J.Hekker pass incomplete deep right to J.Reynolds.,3,4,10,LA,Punt,Non-Special Teams Result,,,,MIN,36,11:03:00,,,,28,20,I,,,0,74


In [7]:
# Muffed punts
muffed_punt_filter = (plays_df['specialTeamsResult'] == 'Muffed') & (plays_df['specialTeamsPlayType'] == 'Punt')
plays_df[muffed_punt_filter][['playDescription', 'kickReturnYardage']].head(20)

Unnamed: 0,playDescription,kickReturnYardage
18,"(:34) M.Bosher punts 38 yards to PHI 32, Center-J.Overbaugh. T.Sullivan MUFFS catch, touched at PHI 32, RECOVERED by ATL-K.Ishmael at PHI 32. K.Ishmael to PHI 32 for no gain (L.Reynolds).",
40,"(3:01) (Punt formation) C.Bojorquez punts 52 yards to BAL 21, Center-R.Ferguson. J.Grant MUFFS catch, ball out of bounds at BAL 15.",
173,"(4:41) T.Daniel punts 47 yards to NE 17, Center-J.Weeks. R.McCarron MUFFS catch, RECOVERED by HOU-J.Bademosi at NE 16. J.Bademosi to NE 16 for no gain (J.Jones).",
237,"(:54) (Punt formation) L.Cooke punts 41 yards to NYG 19, Center-C.Tinker. K.Clay MUFFS catch, RECOVERED by JAX-D.Payne at NYG 21. D.Payne to NYG 21 for no gain (K.Clay).",
367,"(13:37) S.Koch punts 55 yards to CIN 20, Center-M.Cox. D.Phillips MUFFS catch, and recovers at CIN 15. D.Phillips to CIN 31 for 16 yards (J.Allen).",16.0
437,"(9:08) D.Kaser punts 58 yards to BUF 8, Center-M.Windt. M.Murphy MUFFS catch, recovered by BUF-T.Jones at BUF 0. Touchback (U.Nwosu). BUF-T.Jones was injured during the play. His return is Questionable. Touchback due to penalty in end zone. T.Jones assisted to tunnel. PENALTY on LAC-U.Nwosu, Unnecessary Roughness, 15 yards, enforced at BUF 20.",
438,"(4:04) D.Kaser punts 45 yards to BUF 49, Center-M.Windt. M.Murphy MUFFS catch, and recovers at LAC 48. M.Murphy to LAC 42 for 6 yards (A.Phillips; N.Dzubnar).",6.0
803,"(2:21) B.Colquitt punts 3 yards to CLE 34, Center-C.Hughlett. T.Brooks MUFFS catch, ball out of bounds at CLE 28. 56 Pierre-Louis deflected punt",
919,"(2:14) T.Daniel punts 57 yards to NYG 15, Center-J.Weeks. S.Coley MUFFS catch, and recovers at NYG 16.",0.0
1012,"(12:33) (Punt formation) M.Haack punts 42 yards to OAK 19, Center-J.Denney. J.Nelson MUFFS catch, touched at OAK 19, and recovers at OAK 17. J.Nelson to OAK 17 for no gain (W.Aikens).",0.0


In [8]:
# Players that have caught punts
punt_plays_df = plays_df[plays_df['specialTeamsPlayType'] == 'Punt'].dropna(subset=['returnerId'])
punt_plays_df['primaryReturnerId'] = punt_plays_df['returnerId'].str.split(';').str[0].astype('int64')
punt_returns_df = pd.merge(punt_plays_df, players_df, left_on='primaryReturnerId', right_on='nflId')
punt_returns_columns = [
    'gameId',
    'playDescription',
    'specialTeamsResult',
    'returnerId',
    'nflId',
    'displayName'
]
punt_returns_df.head()[punt_returns_columns]

Unnamed: 0,gameId,playDescription,specialTeamsResult,returnerId,nflId,displayName
0,2018090600,"(9:20) C.Johnston punts 56 yards to ATL 36, Center-R.Lovato. J.Hardy to ATL 41 for 5 yards (K.Grugier-Hill). PENALTY on PHI-S.Gibson, Unsportsmanlike Conduct, 15 yards, enforced at ATL 41.",Return,42450,42450,Justin Hardy
1,2018090600,"(12:33) C.Johnston punts 38 yards to ATL 20, Center-R.Lovato, fair catch by J.Hardy.",Fair Catch,42450,42450,Justin Hardy
2,2018091600,"(12:57) M.Palardy punts 48 yards to ATL 42, Center-J.Jansen, fair catch by J.Hardy.",Fair Catch,42450,42450,Justin Hardy
3,2018091600,"(5:43) M.Palardy punts 37 yards to ATL 10, Center-J.Jansen, fair catch by J.Hardy.",Fair Catch,42450,42450,Justin Hardy
4,2018092300,"(9:01) T.Morstead punts 57 yards to ATL 13, Center-Z.Wood. J.Hardy pushed ob at ATL 19 for 6 yards (T.Hill; J.Hardee).",Return,42450,42450,Justin Hardy


In [9]:
# Distribution of punts caught by player
punt_returners_grouped_df = punt_returns_df.groupby(['primaryReturnerId', 'displayName']).size() \
    .reset_index(name='puntsCaught')
punt_returners_grouped_df.sort_values('puntsCaught', ascending=False).head(20)

Unnamed: 0,primaryReturnerId,displayName,puntsCaught
12,35527,Andre Roberts,133
71,43663,Alex Erickson,119
85,44932,Tarik Cohen,100
50,42794,DeAndre Carter,93
131,46279,Braxton Berrios,83
79,44837,Jabrill Peppers,83
41,42051,Diontae Spencer,80
113,46116,Christian Kirk,79
59,43406,Pharoh Cooper,77
68,43556,Chester Rogers,74


In [10]:
# Group by return result (positive, neutral, negative)
conditions = [
    (punt_returns_df['kickReturnYardage'] > 0),
    (punt_returns_df['kickReturnYardage'] == 0) | (punt_returns_df['kickReturnYardage'].isna()),
    (punt_returns_df['kickReturnYardage'] < 0),
]
result_types = ['positive', 'neutral', 'negative']
punt_returns_df['returnResult'] = np.select(conditions, result_types)
punt_returns_df.groupby(['primaryReturnerId', 'displayName', 'returnResult']).size().head()

primaryReturnerId  displayName     returnResult
29830              Adam Jones      negative         1
                                   neutral          2
                                   positive         5
29957              Darren Sproles  neutral         16
                                   positive        20
dtype: int64

In [11]:
# Add isTouchdown column
punt_returns_df['isTouchdown'] = punt_returns_df['playDescription'].str.contains('TOUCHDOWN')
punt_returns_df[['playDescription', 'isTouchdown']].head()

Unnamed: 0,playDescription,isTouchdown
0,"(9:20) C.Johnston punts 56 yards to ATL 36, Center-R.Lovato. J.Hardy to ATL 41 for 5 yards (K.Grugier-Hill). PENALTY on PHI-S.Gibson, Unsportsmanlike Conduct, 15 yards, enforced at ATL 41.",False
1,"(12:33) C.Johnston punts 38 yards to ATL 20, Center-R.Lovato, fair catch by J.Hardy.",False
2,"(12:57) M.Palardy punts 48 yards to ATL 42, Center-J.Jansen, fair catch by J.Hardy.",False
3,"(5:43) M.Palardy punts 37 yards to ATL 10, Center-J.Jansen, fair catch by J.Hardy.",False
4,"(9:01) T.Morstead punts 57 yards to ATL 13, Center-Z.Wood. J.Hardy pushed ob at ATL 19 for 6 yards (T.Hill; J.Hardee).",False


In [12]:
# Load 2018 tracking data (slow, only run when needed)
tracking_2018_df = pd.read_csv('../input/nfl-big-data-bowl-2022/tracking2018.csv')

In [13]:
tracking_2018_df.head()

Unnamed: 0,time,x,y,s,a,dis,o,dir,event,nflId,displayName,jerseyNumber,position,team,frameId,gameId,playId,playDirection
0,2018-12-30T21:25:32.200,41.32,29.45,4.36,1.33,0.43,130.42,128.44,,39470.0,Justin Tucker,9.0,K,home,1,2018123000,36,right
1,2018-12-30T21:25:32.300,41.68,29.17,4.59,1.24,0.45,128.59,127.81,,39470.0,Justin Tucker,9.0,K,home,2,2018123000,36,right
2,2018-12-30T21:25:32.400,42.05,28.88,4.74,0.99,0.47,124.47,128.15,,39470.0,Justin Tucker,9.0,K,home,3,2018123000,36,right
3,2018-12-30T21:25:32.500,42.43,28.59,4.87,0.71,0.48,126.02,127.35,,39470.0,Justin Tucker,9.0,K,home,4,2018123000,36,right
4,2018-12-30T21:25:32.600,42.84,28.31,4.96,0.79,0.5,131.71,124.75,,39470.0,Justin Tucker,9.0,K,home,5,2018123000,36,right


In [14]:
tracking_2018_df[(tracking_2018_df['nflId'] == 21213) & (tracking_2018_df['gameId'] == 2018123011) & (tracking_2018_df['event'] != 'None')]

Unnamed: 0,time,x,y,s,a,dis,o,dir,event,nflId,displayName,jerseyNumber,position,team,frameId,gameId,playId,playDirection
548583,2018-12-31T01:35:27.200,84.76,28.7,0.05,0.05,0.01,113.88,115.14,ball_snap,21213.0,Adam Vinatieri,4.0,K,away,11,2018123011,482,right
548598,2018-12-31T01:35:28.700,87.45,27.57,1.89,2.74,0.19,120.84,95.09,extra_point_attempt,21213.0,Adam Vinatieri,4.0,K,away,26,2018123011,482,right
548614,2018-12-31T01:35:30.300,87.72,28.0,0.47,0.95,0.04,100.81,226.07,extra_point,21213.0,Adam Vinatieri,4.0,K,away,42,2018123011,482,right
554724,2018-12-31T02:01:06.300,35.65,24.54,0.05,0.05,0.02,289.28,283.5,ball_snap,21213.0,Adam Vinatieri,4.0,K,away,11,2018123011,1109,left
554738,2018-12-31T02:01:07.700,33.2,25.7,2.64,2.4,0.25,317.08,283.58,extra_point_attempt,21213.0,Adam Vinatieri,4.0,K,away,25,2018123011,1109,left
554754,2018-12-31T02:01:09.300,32.08,25.73,0.54,0.72,0.05,279.72,7.59,extra_point,21213.0,Adam Vinatieri,4.0,K,away,41,2018123011,1109,left
573860,2018-12-31T03:10:19.700,35.73,24.58,0.2,0.15,0.02,283.78,283.61,ball_snap,21213.0,Adam Vinatieri,4.0,K,away,11,2018123011,2474,left
573875,2018-12-31T03:10:21.200,32.89,25.77,1.91,3.01,0.21,276.89,275.41,extra_point_attempt,21213.0,Adam Vinatieri,4.0,K,away,26,2018123011,2474,left
573889,2018-12-31T03:10:22.600,32.22,25.35,0.56,1.71,0.05,318.12,36.91,extra_point,21213.0,Adam Vinatieri,4.0,K,away,40,2018123011,2474,left
591547,2018-12-31T04:03:42.000,92.92,31.92,0.23,0.41,0.02,142.11,90.5,ball_snap,21213.0,Adam Vinatieri,4.0,K,away,11,2018123011,3828,right


In [15]:
field_goal_plays_df = plays_df[plays_df[PLAY_TYPE] == FIELD_GOAL]
# We don't want this filter b/c it is position on roster (not role in play)
# kicker_tracking_2018_df = tracking_2018_df[tracking_2018_df[PLAYER_POSITION] == KICKER]
field_goal_kicker_tracking_2018_df = pd.merge(
    field_goal_plays_df,
    tracking_2018_df,
    left_on=['gameId', 'playId', 'kickerId'],
    right_on=['gameId', 'playId', 'nflId']
).sort_values(['gameId', 'playId', 'kickerId', 'time'])
field_goal_kicker_tracking_2018_df.head()

NameError: name 'PLAY_TYPE' is not defined

In [None]:
# Add average speed and prior impulse (rolling average of acceleration)
field_goal_kicker_tracking_2018_df['rollingAverageSpeed'] = field_goal_kicker_tracking_2018_df['s'].rolling(window=3).mean().shift(1)
field_goal_kicker_tracking_2018_df['priorImpulse'] = field_goal_kicker_tracking_2018_df['a'].rolling(window=3).mean().shift(1)
field_goal_kicker_tracking_2018_df['isKickGood'] = np.where(field_goal_kicker_tracking_2018_df['specialTeamsResult'] == 'Kick Attempt Good', 1, 0)
columns = ['time', 'playDescription', 'displayName', 'position', 'specialTeamsResult', 'kickLength', 's', 'a', 'priorImpulse', 'rollingAverageSpeed', 'isKickGood']
field_goal_kicker_tracking_2018_df[columns].head(10)

In [None]:
field_goal_kicker_tracking_2018_df[
    (field_goal_kicker_tracking_2018_df[PLAY_EVENT] == FG_ATTEMPT) & (field_goal_kicker_tracking_2018_df['isKickGood'] == 1)
].corr()[['rollingAverageSpeed', 'priorImpulse']]

In [None]:
columns = ['playDescription', 'specialTeamsResult', 'kickLength', 'x', 'y', 's', 'a', 'o', 'dir', 'priorImpulse']
field_goal_kicker_tracking_2018_df[field_goal_kicker_tracking_2018_df[PLAY_EVENT] == FG_ATTEMPT][columns].head(20)

In [None]:
# x-axis = kick length
# y-axis = ??? acceleration/speed/prior impulse/...
# dot color = good/no good
# 27091 - Matt Bryan
# 39470 - Justin Tucker

field_goal_kick_event_tracking_2018_df = field_goal_kicker_tracking_2018_df[(field_goal_kicker_tracking_2018_df[PLAY_EVENT] == FG_ATTEMPT)]

# field_goal_kick_event_tracking_2018_df = field_goal_kicker_tracking_2018_df[
#     (field_goal_kicker_tracking_2018_df[PLAY_EVENT] == FG_ATTEMPT) & (field_goal_kicker_tracking_2018_df['kickerId'] == 27091)
# ]

# field_goal_kick_event_tracking_2018_df = field_goal_kicker_tracking_2018_df[
#     (field_goal_kicker_tracking_2018_df[PLAY_EVENT] == FG_ATTEMPT) & (field_goal_kicker_tracking_2018_df['specialTeamsResult'] == KICK_NO_GOOD)
# ]

# field_goal_kick_event_tracking_2018_df = field_goal_kicker_tracking_2018_df[
#     (field_goal_kicker_tracking_2018_df[PLAY_EVENT] == FG_ATTEMPT) & (field_goal_kicker_tracking_2018_df['kickLength'] >= 38)
# ]

x = field_goal_kick_event_tracking_2018_df['kickLength']
y = field_goal_kick_event_tracking_2018_df['priorImpulse']
# Blocks don't have kick length.. should use yard line instead
c = { KICK_GOOD: 'green', KICK_NO_GOOD: 'red', BLOCKED: 'yellow' }
# _, kick_scatter_plot = plt.subplots()
plt.figure(figsize=(12, 9))
plt.scatter(x, y, c = field_goal_kick_event_tracking_2018_df['specialTeamsResult'].map(c))
# kick_scatter_plot.legend()
plt.show()
x.corr(y)
# field_goal_kick_event_tracking_2018_df.describe()

In [None]:
# Count of kicks by result type... should just use plays_df instead for this
field_goal_kicker_tracking_2018_df[(field_goal_kicker_tracking_2018_df[PLAY_EVENT] == FG_ATTEMPT)].groupby(['specialTeamsResult']).size()

In [None]:
# Compare mean and variance of rolling average speed and prior impulse for each kicker
aggs = { 'rollingAverageSpeed': ['mean', 'var'], 'priorImpulse': ['mean', 'var'], 'isKickGood': ['sum', 'count'] }
kicker_stat_means_2018_df = field_goal_kick_event_tracking_2018_df.groupby(['kickerId', 'displayName']).agg(aggs)
kicker_stat_means_2018_df.columns = [
    'meanRollingAverageSpeed',
    'varianceRollingAverageSpeed',
    'meanPriorImpulse',
    'variancePriorImpulse',
    'madeKicks',
    'totalKickAttempts',
]
kicker_stat_means_2018_df.reset_index()
kicker_stat_means_2018_df['fieldGoalPercentage'] = kicker_stat_means_2018_df['madeKicks'] / kicker_stat_means_2018_df['totalKickAttempts']
kicker_stat_means_2018_df = kicker_stat_means_2018_df[kicker_stat_means_2018_df['totalKickAttempts'] >= 10]
kicker_stat_means_2018_df.head()

In [None]:
# meanRollingAverageSpeed vs meanPriorImpulse
x = kicker_stat_means_2018_df['meanRollingAverageSpeed']
y = kicker_stat_means_2018_df['meanPriorImpulse']
plt.figure(figsize=(12, 9))
plt.scatter(x, y)
plt.show()
x.corr(y)

In [None]:
# Compare variance of mean prior impulse vs. accuracy for each kicker
# x - accuracy
# y - prior impulse
# c - kicker
x = kicker_stat_means_2018_df['fieldGoalPercentage']
y = kicker_stat_means_2018_df['meanRollingAverageSpeed']
plt.figure(figsize=(12, 9))
plt.scatter(x, y)
plt.show()
x.corr(y)

In [None]:
# x - kick distance
# y - rolling impulse
# c - made or missed
kicker_stat_means_2018_df[kicker_stat_means_2018_df['variancePriorImpulse'] > 1]

In [None]:
# Punt Analysis - Proximity of Nearest Defender to Punter at time of Kick
punt_events_2018_df = tracking_2018_df[tracking_2018_df[Column.PLAY_EVENT.value] == PlayEvent.PUNT.value]
punt_events_2018_df = pd.merge(
    punt_events_2018_df[
        (punt_events_2018_df[Column.POSITION.value] == Position.PUNTER.value)
    ],
    punt_events_2018_df,
    left_on=[Column.GAME_ID.value, Column.PLAY_ID.value],
    right_on=[Column.GAME_ID.value, Column.PLAY_ID.value],
)
punt_events_2018_df = punt_events_2018_df[
    (punt_events_2018_df['team_x'] != punt_events_2018_df['team_y']) &
    (punt_events_2018_df['team_y'] != 'football')
]
punt_events_2018_df['playerDistance'] = (
    (punt_events_2018_df['x_y'] - punt_events_2018_df['x_x']) ** 2 +
    (punt_events_2018_df['y_y'] - punt_events_2018_df['y_x']) ** 2
) ** 0.5
punt_events_2018_df = punt_events_2018_df.sort_values([Column.GAME_ID.value, Column.PLAY_ID.value, 'playerDistance'])
punt_events_2018_df['proximityRank'] = punt_events_2018_df.groupby([Column.GAME_ID.value, Column.PLAY_ID.value])['playerDistance'].rank()
punt_events_2018_df = pd.merge(
    punt_events_2018_df,
    plays_df,
    left_on=['gameId', 'playId'],
    right_on=['gameId', 'playId']
)
# punt_events_2018_df[['gameId', 'playId', 'x_x', 'y_x', 'x_y', 'y_y', 'playerDistance', 'position_x', 'position_y', 'displayName_x', 'displayName_y', 'proximityRank']].head(30)
# punt_events_2018_df[(punt_events_2018_df['gameId'] == 2018090600) & (punt_events_2018_df['playId'] == 973)]
columns = [
    'gameId',
    'playId',
    'x_x',
    'y_x',
    'x_y',
    'y_y',
    'playerDistance',
    'position_x',
    'position_y',
    'displayName_x',
    'displayName_y',
    'proximityRank',
    'specialTeamsResult',
]
punt_events_2018_df = punt_events_2018_df[punt_events_2018_df['proximityRank'] == 1.0]
punt_events_2018_df.head()
# punt_events_2018_df[punt_events_2018_df['proximityRank'] == 1.0][columns].sort_values(['playerDistance'])
# punt_events_2018_df[
#     (punt_events_2018_df['proximityRank'] == 1.0) & (punt_events_2018_df['specialTeamsResult'] == 'Blocked Punt')
# ][columns].sort_values(['playerDistance'])

In [None]:
punt_events_2018_df.columns

In [None]:
x = punt_events_2018_df['playerDistance']
y = punt_events_2018_df['kickReturnYardage']
plt.figure(figsize=(12, 9))
plt.scatter(x, y)
plt.show()
x.corr(y)

In [None]:
aggs = { 'playerDistance': ['mean'], 'kickReturnYardage': ['mean'], 'nflId_x': ['count'] }
punters_player_distance_2018_df = punt_events_2018_df.groupby(['nflId_x', 'displayName_x']).agg(aggs).reset_index()
print(punters_player_distance_2018_df.sort_values(('playerDistance', 'mean')).head(20))
punters_player_distance_2018_df[('playerDistance', 'mean')].corr(punters_player_distance_2018_df[('kickReturnYardage', 'mean')])

In [None]:
# tracking_2018_df['event'].unique()
tracking_2018_df.head()

In [None]:
# Jammer Analysis
## Stop gunner from preventing a return (i.e. gunner getting to the returner before ball) -> Good return
## Block other players on a "promising" return -> Good return
## Data points:
#  - Tracking of gunner
#  - Tracking of jammer
#  - Was the jammer's gunner successful in a tackle, fumble recovery, etc...
#  - Play result (yardage "gained" by kicking team)
#  - Special teams result (touchback, return, fair catch, etc...)
#  - Is Touchdown (whether play resulted in touchdown)

In [None]:
# Gunner Analysis
## Prevent a good punt return
## Create good field position for kicking team by:
##  - Forcing a fair catch
##  - Downing the punt
##  - Tackling the returner
##  - Recovering muffs
##  - Recovering fumbles
## Data points:
##  - Proximity of gunner to returner at time of fair catch, muff or return
## Use pff scouting to determine who are gunner(s) on given play


In [None]:
# Field Goal Analysis/Model - Clustering of Kickers by <Metric>


[](http://)