In [38]:
#imports
import sqlite3
import pandas as pd
import numpy as np
from statsmodels.formula.api import ols, logit

In [39]:
# load data for distance model
query = """
select Plays.*, S.*, P.name, G.weather
from Plays join Stats S on Plays.gameId = S.gameId and Plays.playId = S.playId join Players P on Plays.kickerId = P.id join Games G on Plays.gameId = G.id
where Plays.specialTeamsPlayType = 'Punt' and kickerId != 'NA' and
snapTime != 'NA' and operationTime != 'NA' and hangTime != 'NA' and
kickLength != 'NA';
"""

conn = sqlite3.connect('NFL_ST_data')
cur = conn.cursor()
cur.execute(query)
rows = cur.fetchall()
out = []
for row in rows:
    out.append(row)
data = pd.DataFrame(out)
data = data.rename(columns={x:y for x,y in enumerate(['gameId','playId','Description','qtr','down','yardsToGo','posTeam','playType','STResult','kickerId','kickBlockerId','yardlineSide',
                     'yardline','gameClock', 'penaltyYards','HomeScore','VisitorScore','passResult','kickLength','kickReturnYardage',
                     'playResult', 'absoluteYardlineNumber','s.gameID', 's.playId','snapDetail','snapTime','operationTime', 'hangTime', 'kickType', 'kickDirectionIntended',
                     'kickDirectionActual','returnDirectionIntended', 'returnDirectionActual','tackler','kickoffReturnFormation','kickContactType', 'punter', 'weather'])})

data['endzoneDistance'] = np.where(data['posTeam'] == data['yardlineSide'], data['yardline']+50, data['yardline'])
data['kickShank'] = np.where(data['kickDirectionIntended'] != data['kickDirectionActual'], 1, 0)
X = data[['endzoneDistance', 'snapDetail','snapTime','operationTime','hangTime','kickType', 'kickShank', 'kickDirectionActual']]
Y = data['kickLength']

In [40]:
#Get the weather data
bad_weather = {'Rain', 'Light Rain', 'Drizzle', 'Light Snow', 'Light Sleet', 'Light Rain and Windy', 'Snow'}

for i in range(len(data)):
    if data.loc[i,'weather'] in bad_weather:
        data.loc[i, 'weather_simp'] = 'elements'
    else:
        data.loc[i, 'weather_simp'] = 'clear'
#data['weather_simp'] = np.where(np.in1d(data['weather'],bad_weather), 'elements', 'clear')
data['weather_simp'] = np.where(data['weather'] == 'dome', 'dome', data['weather_simp'])

In [41]:
#convert to numeric
for col in ['kickLength', 'snapTime', 'operationTime', 'hangTime']:
    data[col] = pd.to_numeric(data[col])

In [42]:
# Make the model to get the punt distance
# Omits all information about punter except which it is, treating each punter as a fixed effects
# Possible continuation would be attempting mixed effects models
punt_model = ols('kickLength ~ bs(endzoneDistance, knots = (55,90), degree= 3, include_intercept = False) + C(punter) + C(snapDetail) + snapTime + operationTime + C(kickType) + kickShank + C(kickDirectionActual) + C(weather)- 1', data=data)
punt_fit = punt_model.fit()
print('Punt length:')
print(punt_fit.summary())

Punt length:
                            OLS Regression Results                            
Dep. Variable:             kickLength   R-squared:                       0.293
Model:                            OLS   Adj. R-squared:                  0.282
Method:                 Least Squares   F-statistic:                     26.67
Date:                Wed, 05 Jun 2024   Prob (F-statistic):               0.00
Time:                        12:09:30   Log-Likelihood:                -20162.
No. Observations:                5871   AIC:                         4.051e+04
Df Residuals:                    5780   BIC:                         4.111e+04
Df Model:                          90                                         
Covariance Type:            nonrobust                                         
                                                                                coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------

In [43]:
#Look at coefficients for punters and also the standardized coefficient
punter_coef = punt_fit.params.reset_index()
punter_coef = punter_coef.iloc[:(len(data['punter'].unique())-1),:]
punter_coef['index'] = punter_coef['index'].str.split('[').str[1].str[:-1]
punter_coef = punter_coef.rename(columns = {0:'coef'})
punt_avg, punt_sd = (punter_coef['coef'].mean(), punter_coef['coef'].std())
punter_coef['Adj. coef'] = (punter_coef['coef']-punt_avg)/punt_sd
print(punter_coef)

                  index       coef  Adj. coef
0             A.J. Cole  33.411136   0.810067
1         Aldrick Rosas  23.728856  -2.937682
2              Andy Lee  33.232161   0.740791
3        Austin Seibert  34.725108   1.318670
4           Braden Mann  29.825543  -0.577819
5        Bradley Pinion  29.787280  -0.592630
6            Brett Kern  33.729769   0.933402
7      Britton Colquitt  30.954980  -0.140645
8           Bryan Anger  32.050404   0.283364
9      Cameron Johnston  33.609123   0.886703
10          Chris Jones  28.566251  -1.065257
11         Colby Wadman  32.473693   0.447208
12       Colton Schmidt  28.077362  -1.254493
13      Corey Bojorquez  31.793158   0.183792
14         Donnie Jones  28.636292  -1.038146
15           Drew Kaser  33.321255   0.775277
16      Dustin Colquitt  30.577247  -0.286855
17     Hunter Niswander  31.769073   0.174469
18           J.K. Scott  30.947481  -0.143547
19             Jack Fox  34.421877   1.201298
20          Jake Bailey  34.138830

In [44]:
# do cleaning to prepare for modeling the reutrn
data['penaltyYards'] = np.where(data['penaltyYards'] == 'NA', 0, data['penaltyYards'])
data['returnLength'] = data['playResult'] - data['kickLength'] - data['penaltyYards']
data['returnLength'] = pd.to_numeric(np.where(data['STResult'] == 'Return', data['returnLength'], 0))

returns = data[(data['STResult'] == "Return")]
returns = returns[returns['kickReturnYardage'] != "NA"]

returns['kickReturnYardage'] = pd.to_numeric(returns['kickReturnYardage'])

In [45]:
# fit a linear model for returns
return_model = ols('kickReturnYardage ~ kickLength + operationTime + hangTime + C(punter) + C(kickType) + C(kickDirectionActual) + kickShank + C(weather_simp) -1', data=returns)
return_fit = return_model.fit()
print("\n\n\n")
print('Return Length: ')
print(return_fit.summary())





Return Length: 
                            OLS Regression Results                            
Dep. Variable:      kickReturnYardage   R-squared:                       0.082
Model:                            OLS   Adj. R-squared:                  0.057
Method:                 Least Squares   F-statistic:                     3.245
Date:                Wed, 05 Jun 2024   Prob (F-statistic):           1.75e-15
Time:                        12:09:44   Log-Likelihood:                -8511.6
No. Observations:                2277   AIC:                         1.715e+04
Df Residuals:                    2215   BIC:                         1.750e+04
Df Model:                          61                                         
Covariance Type:            nonrobust                                         
                                     coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------------

In [46]:
#Look at how different factors impact if a kick is caught
caught = data[data['STResult'].isin(['Return', 'Fair Catch', 'Muffed'])]
caught['returnAttempted'] = pd.to_numeric(np.where(caught['STResult'] == 'Return', 1, 0))
return_prob = logit("returnAttempted ~ kickLength + operationTime + hangTime + C(kickType) + C(kickDirectionActual) + kickShank+ C(weather_simp)", data = caught)
prob_fit = return_prob.fit()
print("\n\n\n")
print("Return Chance: ")
print(prob_fit.summary())

Optimization terminated successfully.
         Current function value: 0.497246
         Iterations 6




Return Chance: 
                           Logit Regression Results                           
Dep. Variable:        returnAttempted   No. Observations:                 4077
Model:                          Logit   Df Residuals:                     4065
Method:                           MLE   Df Model:                           11
Date:                Wed, 05 Jun 2024   Pseudo R-squ.:                  0.2750
Time:                        12:09:49   Log-Likelihood:                -2027.3
converged:                       True   LL-Null:                       -2796.1
Covariance Type:            nonrobust   LLR p-value:                     0.000
                                   coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------------------
Intercept                        3.5706      0.928  

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [47]:
# pull the tracking data for use in the models
tracking_query = """
select *
from Tracking t join Plays P on P.playId = t.playId and P.gameId = t.gameId join Games g on g.Id = t.gameId
where p.specialTeamsPlayType = 'Punt' and event = 'punt_received'
"""
cur.execute(tracking_query)
rows = cur.fetchall()
out = []
for row in rows:
    out.append(row)
column_names = [description[0] for description in cur.description]
tracking = pd.DataFrame(out, columns=column_names)

#drop unneeded columns
columns_to_drop = [15, 16]
tracking = tracking.iloc[:, [i for i in range(len(tracking.columns)) if i not in columns_to_drop]]

In [48]:
# remove blocked kicks
tracking = tracking[tracking['kickBlockerId'] == 'NA']

In [49]:
#get list of all punts
uniques = list(set(zip(tracking['gameId'], tracking['playId'])))
# remove a misclassified blocked punt
uniques.pop(1261)

(2018100711, 253)

In [50]:
# fix NA in direction
tracking['dir'] = np.where(tracking['dir'] != 'NA',  tracking['dir'], tracking['o'])
tracking['dir'] = np.where(tracking['dir'] != 'NA',  tracking['dir'], 0)
tracking['dir'] = tracking['dir'].fillna(0).astype(float)

In [51]:
#get list of all returners
all_returners = ['Bryce Callahan', 'Tarik Cohen', 'Marvin Hall', 'Marquez Callaway', 'T.J. Jones', 'Chris Claybrooks', 'Brandon Powell', 'Randall Cobb', 'Keke Coutee', 'Hunter Renfrow', 'Rashad Greene', 'Janarion Grant', 'Alex Erickson', 'Tyler Ervin', 'Tyler Boyd', 'Patrick Peterson', 'Julian Edelman', 'Tyler Lockett', 'Chris Hogan', 'Corey Coleman', 'Darrius Shepherd', 'DeAndre Carter', 'Diontae Johnson', 'Jalen Richard', 'Cameron Batson', 'Jarvis Landry', 'Darius Phillips', 'Dontrell Hilliard', 'Tavon Austin', 'Jamison Crowder', 'DeSean Jackson', 'Kaelin Clay', 'Kenny Moore', 'Anthony Miller', 'K.J. Osborn', 'Matthew Slater', 'Golden Tate', 'Dede Westbrook', 'Jalen Reagor', 'Isaiah Rodgers', 'K.J. Hamler', 'Cyrus Jones', 'Chad Beebe', 'Donovan Peoples-Jones', 'Brandon Zylstra', 'C.J. Board', 'Quadree Henderson', 'Jabrill Peppers', 'Jonathan Jones', 'Charvarius Ward', 'River Cracraft', 'Jeremy Kerley', 'James Proche', 'Andre Roberts', 'T.Y. Hilton', 'Tim White', 'Damiere Byrd', 'Jamal Agnew', 'Mohamed Sanu', 'Austin Carr', 'Quandre Diggs', 'Preston Williams', 'Marcus Murphy', 'Jakeem Grant', 'Michael Walker', 'Troymaine Pope', 'Dwayne Harris', 'Tyreek Hill', 'J.J. Taylor', 'Stacy Coley', 'D.J. Reed', 'Deionte Thompson', 'Ted Ginn', 'Greg Ward', 'Brandon Aiyuk', 'Tremon Smith', 'D.J. Moore', 'Spencer Schnell', 'Christian Kirk', 'Demarcus Robinson', 'Mecole Hardman', 'Antonio Hamilton', 'Dante Pettis', 'Odell Beckham', 'Marcus Sherels', 'Antonio Callaway', 'Ced Wilson', 'Jaydon Mickens', 'Greg Dortch', 'Jamie Collins', 'Diontae Spencer', 'Antonio Brown', 'Keelan Cole', 'Taysom Hill', 'Bisi Johnson', 'Kenjon Barner', 'Ryan Switzer', 'Danny Amendola', 'Russell Gage', 'Justin Hardy', 'DaeSean Hamilton', 'Tyrann Mathieu', 'Gunner Olszewski', 'Nyheim Hines', 'Rashard Davis', 'football', 'Brandon Tate', 'Nelson Agholor', 'Janoris Jenkins', 'Trent Taylor', 'Bobo Wilson', 'Javien Elliott', 'LeShaun Sims', 'Boston Scott', 'Maurice Harris', 'Cole Beasley', 'Richie James', 'Trevor Davis', 'Rishard Matthews', 'Alvin Kamara', 'Steven Sims', 'Chester Rogers', 'Darren Sproles', 'Isaiah McKenzie', "Adoree' Jackson", 'Charone Peake', 'Justin Watson', 'Kalif Raymond', "Da'Mari Scott", 'Isaiah Wright', 'Nsimba Webster', 'Josh Jackson', 'Myles Hartsfield', 'Nick Williams', 'Torry McTyer', 'J.J. Jones', 'Mike Hughes', 'Jaire Alexander', 'Phillip Lindsay', 'Andy Isabella', 'Marqui Christian', 'Tramon Williams', 'Byron Murphy', 'Greg Stroman', 'Quenton Meeks', 'Riley McCarron', 'Corey Clement', 'Travis Benjamin', 'Cedrick Wilson', 'Tommylee Lewis', 'Deonte Harris', 'Devin Duvernay', 'Cooper Kupp', 'Will Fuller', 'Desmond King', 'Pharoh Cooper', 'Jawill Davis', 'CeeDee Lamb', 'Kenny Stills', 'Jordy Nelson', 'Adam Humphries', 'Braxton Berrios', 'Brandon Rusnak', 'Dominique Rodgers-Cromartie', 'Mike Thomas', 'Micah Hyde', 'Trey Quinn', 'K.J. Hill', "De'Anthony Thomas", 'Danny Johnson', 'David Moore', 'JoJo Natson', 'T.J. Logan', 'Adam Jones', 'Avonte Maddox', "D'Ernest Johnson", 'Ray-Ray McCloud']

In [52]:
# get arrays of all punters and returners
all_punters = tracking[tracking['position'] == 'P']['name'].unique()
all_returners = np.array(all_returners)

In [53]:
#prepare data for model

#make empty data frame
ready_data = pd.DataFrame(columns = [x for x in range(0,127+len(all_returners)+3+len(all_punters))])
#iterate through all punts
for i in range(len(uniques)):
    #get the current punt
    play_one = tracking[(tracking['gameId'] == uniques[i][0]) & (tracking['playId'] == uniques[i][1])].reset_index(drop=True)
    # adjust all positions to be realtive to the football
    football = play_one[play_one['name'] == 'football']
    football_loc = (football['x'], football['y'])
    for j in range(len(play_one)):
        play_one.iloc[j,1] -= football_loc[0]
        play_one.iloc[j,2] -= football_loc[1]
    
    #get distance between each player and the football
    # dist only needed for sorting so we can skip square root since it is also always increasing
    play_one.loc[:,'dist'] = play_one['x']**2 + play_one['y']**2

    #switch teams to from home and away to recv and kick
    if play_one.loc[0, 'possessionTeam'] == play_one.loc[0, 'home']:
        play_one['team'] = np.where(play_one['team'] == 'home', 'kick', 'recv')
    else:
        play_one['team'] = np.where(play_one['team'] == 'away', 'kick', 'recv')
        
    #calculate speed at ballcarrier using a dot product
    mag1 = np.sqrt(np.cos(np.radians((play_one['dir']+90)%360))**2 + np.sin(np.radians((play_one['dir']+90)%360))**2)
    mag2 = np.sqrt(play_one['x']**2 + play_one['y']**2)
    play_one['dot'] = play_one['s'] * (-np.cos(np.radians((play_one['dir']+90)%360)) * -play_one['x'] + np.sin(np.radians((play_one['dir']+90)%360)) * -play_one['y'])/(mag1*mag2)
        
    #sort by team and distance so it is also in a consistient order for the model
    sorted = play_one.sort_values(by=['team', 'dist']).reset_index(drop=True)
    #drop football and returner (11 is ball, 12 is returner
    returner = sorted.loc[12,'name']
    sorted = sorted.drop([11,12])
    all_values = []

    
    
    # extend dataframe into one long row so each play is one row 
    for index, row in sorted.iterrows():
        all_values.extend([row['x'], row['y'], row['s'], row['a'], row['dir'], row['dot']])
    
    aggregated_row = pd.Series(all_values).to_frame().T
    
    #add returner as a column
    returner_ind = np.where(all_returners == returner)
    returner_cols = pd.DataFrame(np.zeros((1, len(all_returners))))
    returner_cols.iloc[0,returner_ind] = 1
    returner_cols.columns = range(126, 126+len(all_returners))
    aggregated_row = pd.concat([aggregated_row, returner_cols],axis=1)
    
    #Tadd initial yardline to model (subratcing 10 to get rid of endzone)
    aggregated_row[len(aggregated_row.iloc[0,:])] = play_one.loc[0,'absoluteYardlineNumber'] - 10
    
    #add punter as a column
    try:
        punter = sorted[sorted['position'] == 'P'].iloc[0,10]
    except IndexError:
        punter = sorted[sorted['position'] == 'K'].iloc[0,10]
    punter_ind = np.where(all_punters == punter)
    punter_cols = pd.DataFrame(np.zeros((1, len(all_punters))))
    punter_cols.iloc[0,punter_ind] = 1
    punter_cols.columns = range(len(aggregated_row.iloc[0,:]), len(aggregated_row.iloc[0,:])+len(all_punters))
    aggregated_row = pd.concat([aggregated_row, punter_cols],axis=1)
    
    
    #add hangtime as a column
    try:
        aggregated_row[len(aggregated_row.iloc[0,:])] = data[(data['gameId'] == uniques[i][0]) & (data['playId'] == uniques[i][1])].iloc[0,27]
    except IndexError:
        aggregated_row[len(aggregated_row.iloc[0,:])] = np.NAN
        print(i)
    
    
    #add punt distance as a column
    aggregated_row[len(aggregated_row.iloc[0,:])] = play_one.loc[0,'kickLength']
    
    #add return yardage
    aggregated_row[len(aggregated_row.iloc[0,:])] = play_one['kickReturnYardage'][0]
    
    ready_data.loc[i] = aggregated_row.loc[0]
    
    #track progress
    if i%500 == 0:
        print(f"{round((i/len(uniques))*100,2)}% complete")


0.0% complete
21.09% complete
42.18% complete
63.26% complete
84.35% complete


In [54]:
#make sure that there is return yardage
ready_data = ready_data[ready_data[len(ready_data.iloc[0,:])-1] != 'NA']

In [55]:
# get dependant variable
Y = ready_data.iloc[:,-1].astype('float32')
# get independent variables, one collection of all, one collection excluding the punter and punt characteristics
X1 = ready_data.iloc[:,:-1]
X2 = ready_data.iloc[:,:-(3+len(all_punters))]

In [56]:
# more imports
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import KFold

In [57]:
#clear keras just in case
tf.keras.backend.clear_session()

In [58]:
#set up cross-validation to find best model structure
kf = KFold(n_splits = 8)
VALIDATION_LOSS = []

for train_index, val_index in kf.split(X1, Y):
  #set train and validation sets
  train_x = X1.iloc[train_index]
  tf.convert_to_tensor(train_x, dtype=tf.float32)
  valid_x = X1.iloc[val_index]
  tf.convert_to_tensor(valid_x, dtype=tf.float32)
  train_y = Y.iloc[train_index]
  tf.convert_to_tensor(train_y, dtype=tf.float32)
  valid_y = Y.iloc[val_index]
  tf.convert_to_tensor(valid_y, dtype=tf.float32)
  
  #set seed to make reproducable (for random starting weights)
  keras.utils.set_random_seed(14)
  #build model
  network = keras.models.Sequential([
      keras.layers.Dense(1024, activation = 'elu'),
      keras.layers.Dense(2048, activation = "elu"),
      keras.layers.Dense(4096, activation = 'elu'),
      keras.layers.Dense(4096, activation="elu"),
      keras.layers.Dense(4096, activation="elu"),
      keras.layers.Dense(2048, activation="elu"),
      keras.layers.Dense(1, activation = 'linear')
  ])
  
  network.compile(loss="mean_squared_error",
                optimizer=keras.optimizers.Adam(learning_rate=0.001),
                metrics = ["mean_squared_error", "mean_absolute_error"])
  
  
  history = network.fit(train_x, train_y,
                      epochs=12, batch_size=128,
                      validation_data=(valid_x, valid_y), verbose = 0)
  
  results = network.evaluate(valid_x, valid_y)
  
  VALIDATION_LOSS.append(results[0])
  
  tf.keras.backend.clear_session()
  

[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - loss: 108.1547 - mean_absolute_error: 6.8440 - mean_squared_error: 108.8002
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - loss: 113.7605 - mean_absolute_error: 6.8550 - mean_squared_error: 112.6194
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step - loss: 129.1349 - mean_absolute_error: 6.3280 - mean_squared_error: 130.8813
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step - loss: 129.8516 - mean_absolute_error: 6.9669 - mean_squared_error: 132.1587
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step - loss: 80.7071 - mean_absolute_error: 6.2888 - mean_squared_error: 80.7071
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step - loss: 132.1837 - mean_absolute_error: 7.3027 - mean_squared_error: 132.1837
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step - loss: 90.4212 - mean

In [59]:
# make a second network for the data not including the punter or punt characteristics
network2 = keras.models.Sequential([
      keras.layers.Dense(1024, activation = 'elu'),
      keras.layers.Dense(2048, activation = "elu"),
      keras.layers.Dense(4096, activation = 'elu'),
      keras.layers.Dense(4096, activation="elu"),
      keras.layers.Dense(4096, activation="elu"),
      keras.layers.Dense(2048, activation="elu"),
      keras.layers.Dense(1, activation = 'linear')
  ])
  
network2.compile(loss="mean_squared_error",
                optimizer=keras.optimizers.Adam(learning_rate=0.001),
                metrics = ["mean_squared_error", "mean_absolute_error"])
  
  

In [60]:
#Fit the two models
full_model = network.fit(X1, Y,
                      epochs=12, batch_size=128,verbose = 0)
limited_model = network2.fit(X2, Y,
                            epochs=12, batch_size=128,verbose = 0)

In [61]:
# Find average punter attributed distance for each punter
final = pd.DataFrame(columns = ['Name', 'Distance', 'Return', 'total'])
for i in range(len(all_punters)):
    #just get punts from the current punter
    mask = np.where(ready_data.iloc[:,127+len(all_returners)+i] == 1)
    #get the expected return values for all of their punts
    full = network.predict(X1.iloc[mask], verbose=0)
    limited = network2.predict(X2.iloc[mask], verbose=0)
    # take the difference to get the punters contribution to expected return
    diff = (limited-full).mean()
    try:
        #add the added distance added to the punt to the expectedreturn distance mitigated
        dist = punter_coef[punter_coef['index'] == all_punters[i]].iloc[0,1] + diff
    except IndexError:
        print(all_punters[i])
        #Catch Lachlan Edwards also going as Lac Edwards
        dist = punter_coef[punter_coef['index'] == "Lachlan Edwards"].iloc[0,1] + diff
    # collect all into one dataframe
    final.loc[len(final)] = [all_punters[i],dist-diff, diff, dist]
    print(all_punters[i], dist)

Sam Koch 32.93121873973017
Matt Haack 30.662820713741297
Matt Darr 24.114257332755454
J.K. Scott 30.238206297181137
Sam Martin 31.510412295079647
Trevor Daniel 29.7046282140639
Logan Cooke 31.24967604768733
Johnny Townsend 28.166292958304133
Pat O'Donnell 31.809978060592314
Matt Wile 30.66169885145021
Lac Edwards
Lac Edwards 31.01629223539347
Ryan Allen 31.757671514161295
Michael Palardy 30.967701622677403
Chris Jones 27.594357677363547
Kevin Huber 30.858525298642196
Jordan Berry 30.673458943979828
Bryan Anger 31.068460001134
Matt Bosher 29.883095075632056
Brett Kern 33.09439213741307
Rigoberto Sanchez 31.85588112826794
Cameron Johnston 32.55354364180484
Tress Way 32.59556286729686
Colby Wadman 31.733198563278094
Donnie Jones 27.776993640061928
Bradley Pinion 29.290315386827185
Michael Dickson 33.29220696960188
Riley Dixon 30.869540918856426
Britton Colquitt 30.366487943677168
Andy Lee 32.45832581060324
Johnny Hekker 32.933886741693094
Dustin Colquitt 29.54207256979999
Thomas Morstead 

In [62]:
#add a combined name column
final[['first', 'last']] = final['Name'].str.split(' ', expand=True)
final['short'] = final['first'].str[0] + '. ' + final['last']

Make Graphics

In [63]:
# import plotnine to graph results
from plotnine import ggplot, geom_point, geom_label, aes, labs, theme_grey, xlim, ggsave, geom_col, coord_flip, scale_x_continuous, scale_color_manual, scale_y_continuous, geom_segment,theme, element_text, geom_vline, geom_line, geom_smooth

In [64]:
# Graphic comparing distance added to punt with distance expected return is shorten
adjust_text_dict = {
    'force_text':(0.45,0.45),
    'force_static':(0.3, 0.3),
    'force_pull':(0.4,0.4),
    'pull_threshold':5,
    'expand_axes':True,
    'arrowprops': {
        'arrowstyle': '->',
        'color': 'red'
    }
}


p = ggplot(final, aes("Distance","Return",label = "short", size = 11))+geom_point()+ geom_label(size=16) + labs(x = "Calculated Average Impact on Gross Punt Distance (Yards)", y= "Calculated Average Impact on Return (Yards)", title = "Calculated Punter Impact") + theme_grey()+xlim(25,35.5) + theme(axis_title=element_text(size=24), plot_title=element_text(size=32,face="bold"), axis_text=element_text(size=24), legend_position="none")


In [65]:
#p.save(filename="Figures/Scatter.png", dpi=1000, height=8, width=14, units='in')

In [66]:
#Look at results
final

Unnamed: 0,Name,Distance,Return,total,first,last,short
0,Sam Koch,33.426973,-0.495754,32.931219,Sam,Koch,S. Koch
1,Matt Haack,31.4945,-0.831679,30.662821,Matt,Haack,M. Haack
2,Matt Darr,25.688242,-1.573985,24.114257,Matt,Darr,M. Darr
3,J.K. Scott,30.947481,-0.709275,30.238206,J.K.,Scott,J. Scott
4,Sam Martin,32.208273,-0.69786,31.510412,Sam,Martin,S. Martin
5,Trevor Daniel,30.356483,-0.651855,29.704628,Trevor,Daniel,T. Daniel
6,Logan Cooke,32.254281,-1.004605,31.249676,Logan,Cooke,L. Cooke
7,Johnny Townsend,28.991783,-0.82549,28.166293,Johnny,Townsend,J. Townsend
8,Pat O'Donnell,32.597713,-0.787735,31.809978,Pat,O'Donnell,P. O'Donnell
9,Matt Wile,31.546543,-0.884844,30.661699,Matt,Wile,M. Wile


In [67]:
#look at ranges between distance and return
print(final['Distance'].max() - final['Distance'].min())
print(final['Return'].max() - final['Return'].min())

9.207416526478223
1.7791669


In [68]:
#Get top five and bottom 5
pd.concat([final.sort_values('total').iloc[:5], final.sort_values('total').iloc[len(final)-5:]])

Unnamed: 0,Name,Distance,Return,total,first,last,short
2,Matt Darr,25.688242,-1.573985,24.114257,Matt,Darr,M. Darr
41,Kasey Redfern,26.570989,-0.88637,25.684619,Kasey,Redfern,K. Redfern
32,Colton Schmidt,28.077362,-1.485365,26.591997,Colton,Schmidt,C. Schmidt
13,Chris Jones,28.566251,-0.971893,27.594358,Chris,Jones,C. Jones
23,Donnie Jones,28.636292,-0.859298,27.776994,Donnie,Jones,D. Jones
18,Brett Kern,33.729769,-0.635377,33.094392,Brett,Kern,B. Kern
25,Michael Dickson,34.045769,-0.753562,33.292207,Michael,Dickson,M. Dickson
40,Jake Bailey,34.13883,-0.705947,33.432883,Jake,Bailey,J. Bailey
44,Jack Fox,34.421877,-0.808044,33.613833,Jack,Fox,J. Fox
42,Ryan Winslow,34.895659,-0.878948,34.016711,Ryan,Winslow,R. Winslow


In [69]:
#make graphic showing top five and bottom five punters by average punter attributed distance

p2 = ggplot(pd.concat([final.sort_values('total').iloc[:5], final.sort_values('total').iloc[len(final)-5:]]), aes("reorder(Name,total)","total")) + geom_point(size = 5)+ geom_segment(aes(x='Name',xend='Name',y=0, yend='total'), size =3)+coord_flip()+labs(y="Average Yards Added per Punt", x= "Punter", title= "Average Punter Attributed Distance", subtitle = "Top Five vs. Bottom Five") + theme(axis_title=element_text(size=28), plot_title=element_text(size=32,face="bold", hjust=0.5), plot_subtitle=element_text(size = 32, hjust=0.5), axis_text=element_text(size=24))+ geom_vline(xintercept=5.5, linetype='dashed', size =2, color = 'red')+ scale_y_continuous(breaks=[0,5,10,15,20,25,30,35])

In [70]:
#p2.save(filename="Figures/top5bottom5.png", dpi=1000, height=8, width=14, units='in')

In [71]:
# make graphic showing kick length controling for yardline of the kick and weather
p3 = ggplot(data, aes(x='endzoneDistance', y = 'kickLength', color = 'weather_simp')) + geom_point(size=0.3)+geom_smooth(method= 'loess', size = 1.5) + theme(axis_title=element_text(size=24), plot_title=element_text(size=25,face="bold", hjust=1), axis_text=element_text(size=18), legend_position=(0.01,0.99), legend_title=element_text(size=24), legend_text=element_text(size=20)) + labs(y="Punt Length (Yards)", x= "Distance to End Zone (Yards)", title= "Punt Length vs. End Zone Distance", color = "Weather")  + scale_color_manual(labels = ("Clear", "Domed", 'Elements'), values = ("red", 'green', 'blue'))

In [72]:
#p3.save(filename="Figures/LengthVsDistance.png", dpi=750, height=6, width=7.5, units='in')

In [73]:
# Make graphic showing kick length controlling for hangtime
p4 = ggplot(data[data['hangTime']>2], aes(x='hangTime', y = 'kickLength')) + geom_point(size=0.3)+geom_smooth(method= 'loess', size = 1.5, color = 'blue')+theme(axis_title=element_text(size=24), plot_title=element_text(size=25,face="bold"), axis_text=element_text(size=18)) + labs(y="Punt Length (Yards)", x= "Hang Time (Seconds)", title= "Punt Length vs. Hang Time")

In [74]:
#p4.save(filename="Figures\LengthVsHangTime.png", dpi=750, height=6, width=7.5, units='in')