In [None]:
import numpy as np
import pandas as pd
import nfl_data_py

from sklearn.linear_model import LogisticRegression
from pbp.data import (
    CURRENT_SEASON,
    BASELINES_PATH,
    MODELS_PATH,
    LOGR_KWARGS,
    add_is_home,
    load_pbp_data,
)

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

In [None]:
pbp_copy = load_pbp_data(range(2017, CURRENT_SEASON + 1))
pbp = pbp_copy.drop_duplicates(subset=['play_id','game_id'])

In [None]:
rosters = nfl_data_py.import_seasonal_rosters([CURRENT_SEASON])

In [None]:
fga_df = pd.get_dummies(
    pbp[pbp['field_goal_attempt'] == 1.0]
    [['kicker_player_id','kicker_player_name','kick_distance','field_goal_result','fg_prob','roof','touchdown','posteam','home_team','location']]
    .reset_index(drop=True),
    columns=['roof']
)

add_is_home(fga_df)
fga_df['fg_blocked'] = (fga_df['field_goal_result'] == 'blocked').astype(int)
fga_df['fg_good'] = (fga_df['field_goal_result'] == 'made').astype(int)

not_blocked = fga_df['fg_blocked'] == 0.0

# dome & closed are same thing
fga_df['roof_dome'] = fga_df['roof_dome'] + fga_df['roof_closed']

fga_df['distance_pct'] = fga_df['kick_distance'] / 100
fga_df['distance_2'] = fga_df['distance_pct'] ** 2
fga_df['distance_3'] = fga_df['distance_pct'] ** 3

fga_df['roof_open_dist'] = fga_df['roof_open'] * fga_df['distance_pct']
fga_df['roof_dome_dist'] = fga_df['roof_dome'] * fga_df['distance_pct']
fga_df['roof_outdoors_dist'] = fga_df['roof_outdoors'] * fga_df['distance_pct']

fga_df['short_fg'] = 1 / (1.0 + np.exp((fga_df['kick_distance'] - 40) / 5))
fga_df['long_fg'] = 1.0 - fga_df['short_fg']
fga_df['short_fg_oe'] = fga_df['short_fg'] * (fga_df['fg_good'] - fga_df['fg_prob'])
fga_df['long_fg_oe'] = fga_df['long_fg'] * (fga_df['fg_good'] - fga_df['fg_prob'])


In [None]:
setting_fg_features = [
    'is_offense_home',
    'distance_pct', 'distance_2', 'distance_3',
    'roof_dome',
    'roof_open',
    'roof_outdoors',
    'roof_dome_dist',
    'roof_open_dist',
    'roof_outdoors_dist',
]

kicker_fg_features = [
    'short_fg_z', 'long_fg_z',
    'short_z_dist', 'long_z_dist',
    'short_z_dist_2', 'long_z_dist_2',
    'short_z_dist_3', 'long_z_dist_3',
]

fg_features = [*setting_fg_features, *kicker_fg_features]


In [None]:
fg_block_model = LogisticRegression(**LOGR_KWARGS)
fg_block_model.fit(fga_df[setting_fg_features], fga_df['fg_blocked'])

In [None]:
fg_block_return_td_model = LogisticRegression(**LOGR_KWARGS)
fg_block_return_td_model.fit(fga_df[~not_blocked][setting_fg_features], fga_df[~not_blocked]['touchdown'])


In [None]:
kicker_features = fga_df[not_blocked].groupby(['kicker_player_id', 'kicker_player_name'], as_index=False).aggregate({'short_fg_oe': 'mean', 'long_fg_oe': 'mean'}).sort_values(['short_fg_oe', 'long_fg_oe'], ascending=False).reset_index(drop=True)

In [None]:
kicker_features['short_fg_z'] = ((kicker_features['short_fg_oe'] - kicker_features['short_fg_oe'].mean()) / kicker_features['short_fg_oe'].std()).clip(-3, 3)
kicker_features['long_fg_z'] = ((kicker_features['long_fg_oe'] - kicker_features['long_fg_oe'].mean()) / kicker_features['long_fg_oe'].std()).clip(-3, 3)

fga_train = fga_df[not_blocked].merge(kicker_features[['kicker_player_id', 'short_fg_z', 'long_fg_z']], on='kicker_player_id', how='left')

fga_train['short_z_dist'] = fga_train['short_fg_z'] * fga_train['distance_pct']
fga_train['long_z_dist'] = fga_train['long_fg_z'] * fga_train['distance_pct']

fga_train['short_z_dist_2'] = fga_train['short_fg_z'] * fga_train['distance_2']
fga_train['long_z_dist_2'] = fga_train['long_fg_z'] * fga_train['distance_2']

fga_train['short_z_dist_3'] = fga_train['short_fg_z'] * fga_train['distance_3']
fga_train['long_z_dist_3'] = fga_train['long_fg_z'] * fga_train['distance_3']

In [None]:
fg_made_model = LogisticRegression(**LOGR_KWARGS)
fg_made_model.fit(fga_train[fg_features], fga_train['fg_good'])

fga_train['ins_prob'] = fg_made_model.predict_proba(fga_train[fg_features])[:, 1]

In [None]:
def format_fg_block_coef(model: LogisticRegression, name: str) -> str:
    ret = []
    ret.append(f"\n    pub fn {name}() -> FgModel {{")
    ret.append("        FgModel {")
    ret.append(f'            intercept: {model.intercept_[0]:.3f},')
    for f, c in zip(setting_fg_features, model.coef_[0]):
        ret.append(f'            {f}: {c:.3f},')
    for c in kicker_fg_features:
        ret.append(f'            {c}: 0.0,')
    ret.append("        }")
    ret.append("    }\n")
    return '\n'.join(ret)

def format_fg_good_coef(model_: LogisticRegression, name: str):
    ret = []
    ret.append(f"\n    pub fn {name}_coef() -> FgModel {{")
    ret.append("        FgModel {")
    ret.append(f'            intercept: {model_.intercept_[0]:.3f},')
    for f, c in zip(fg_features, model_.coef_[0]):
        ret.append(f'            {f}: {c:.3f},')
    ret.append("        }")
    ret.append("    }\n")
    return '\n'.join(ret)
    

In [None]:
# print("pub struct FgModel {")
# print("    pub intercept: f64,")
# for f in fg_features:
#     print(f"    pub {f}: f64,")
# print("}\n")

model_str_parts = ["use crate::models::field_goals::FgModel;\n", "impl FgModel {"]
model_str_parts.append(format_fg_block_coef(fg_block_model, "is_fg_blocked_coef"))
model_str_parts.append(format_fg_block_coef(fg_block_return_td_model, "is_fg_block_returned_for_td_coef"))
model_str_parts.append(format_fg_good_coef(fg_made_model, "is_fg_good"))
model_str_parts.append("}")

model_str = '\n'.join(model_str_parts)

with open(f"{MODELS_PATH}/field_goals/coef.rs", 'w') as f:
    f.write(model_str)

In [None]:
fga_train[['fg_good', 'ins_prob', 'fg_prob', 'short_fg_z', 'long_fg_z']].head()

In [None]:
write_kickers = (
    kicker_features
    .drop(columns=['short_fg_oe','long_fg_oe'])
    .rename(columns={'kicker_player_id': 'player_id'})
)
write_kickers['short_fg_z'] = write_kickers['short_fg_z'].round(2)
write_kickers['long_fg_z'] = write_kickers['long_fg_z'].round(2)
write_kickers.to_csv(f"{BASELINES_PATH}/kickers.csv", index=False)