In [None]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression, PoissonRegressor

from pbp.data import (
    CURRENT_SEASON,
    MODELS_PATH,
    POIS_KWARGS,
    LOGR_KWARGS,
    add_is_home,
    load_pbp_data
)

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

In [None]:
pbp_copy = load_pbp_data(range(2017, CURRENT_SEASON + 1))

In [None]:
pbp = pbp_copy.drop_duplicates(subset=['play_id','game_id'])

In [None]:
punt_cols = ['posteam','defteam','home_team','yardline_100','punt_blocked','touchback','fumble_lost','punt_fair_catch','kick_distance','return_yards', 'return_touchdown','punter_player_id', 'punter_player_name', 'location']
distance_features = ['is_offense_home', 'yardline_pct','log_yardline_pct', 'touchback_goodness', 'log_touchback_goodness', 'yardline_tbg']

punt_plays = pbp['play_type'] == 'punt'
punt_df = pbp[punt_plays][punt_cols].reset_index(drop=True)

add_is_home(punt_df)

punt_df.loc[(punt_df['touchback'] == 1) & (punt_df['kick_distance'].isna()), 'kick_distance'] = punt_df['yardline_100']

punt_df['yardline_pct'] = punt_df['yardline_100'] / 100
punt_df['log_yardline_pct'] = np.log(punt_df['yardline_pct'].clip(0.01, 1.0))

punt_df['kick_distance_pct'] = punt_df['kick_distance'] / 100
punt_df['log_kick_distance_pct'] = np.log(punt_df['kick_distance_pct'].clip(0.01, 1.0))

punt_df['punted_to_100'] = (punt_df['yardline_100'] - punt_df['kick_distance'])
punt_df['punted_to_pct'] = punt_df['punted_to_100'] / 100
punt_df['log_punted_to_pct'] = np.log(punt_df['punted_to_pct'].clip(0.01, 1.0))
punt_df['touchback_goodness'] = 1 / (1 + np.exp(-1 * (punt_df['yardline_100'] - 60) / 8))
punt_df['log_touchback_goodness'] = np.log(punt_df['touchback_goodness'])
punt_df['yardline_tbg'] = punt_df['yardline_pct'] * punt_df['touchback_goodness']

In [None]:
punt_blocks = pbp['punt_blocked'] == 1
punt_block_df = pbp[punt_plays][[*punt_cols, 'punt_blocked']].reset_index(drop=True)

In [None]:
punt_block_model = LogisticRegression(**LOGR_KWARGS)
punt_block_model.fit(punt_df[distance_features], punt_df['punt_blocked'])

In [None]:
punt_success_df = punt_df[punt_df['punt_blocked'] == 0].dropna().reset_index(drop=True)
punt_success_df['is_fair_catch'] = ((punt_success_df['punt_fair_catch'] == 1) | ((punt_success_df['fumble_lost'] == 0) & (punt_success_df['return_yards'] == 0))).astype(int)

In [None]:
punt_distance_model = PoissonRegressor(**POIS_KWARGS)
punt_distance_model.fit(punt_success_df[distance_features], punt_success_df['kick_distance'].clip(0))
# punt_distance_pred = punt_distance_model.predict(punt_success_df[distance_features])
# punt_distance_resid = (punt_distance_pred - punt_success_df['kick_distance'].clip(0)).var()
# print(f'const PUNT_DISTANCE_RESID: f32 = {punt_distance_resid:.4f};')

In [None]:
punted_features = [*distance_features, 'kick_distance_pct', 'log_kick_distance_pct', 'punted_to_pct', 'log_punted_to_pct']
touchback_model = LogisticRegression(**LOGR_KWARGS)
touchback_model.fit(punt_success_df[punted_features], punt_success_df['touchback'])

In [None]:
fair_catch_df = punt_success_df[punt_success_df['touchback'] == 0].reset_index(drop=True)
fair_catch_model = LogisticRegression(**LOGR_KWARGS)

# either an explicit fair catch, or no return (e.g. downed by defense)
fair_catch_model.fit(fair_catch_df[punted_features], fair_catch_df['is_fair_catch'])

In [None]:
returned_punts = (punt_success_df['touchback'] == 0) & (punt_success_df['is_fair_catch'] == 0)

fumlost_df = punt_success_df[returned_punts].reset_index(drop=True)

fumlost_model = LogisticRegression(**LOGR_KWARGS)
fumlost_model.fit(fumlost_df[punted_features], fumlost_df['fumble_lost'])

In [None]:
fumbles_lost = returned_punts & (punt_success_df['fumble_lost'] == 1)

fumble_return_td_df = punt_success_df[fumbles_lost].reset_index(drop=True)
fumlost_rtd_model = LogisticRegression(**LOGR_KWARGS)
fumlost_rtd_model.fit(fumble_return_td_df[punted_features], fumble_return_td_df['return_touchdown'])

In [None]:
returned_punts = returned_punts & (punt_success_df['fumble_lost'] == 0)

punt_return_td_df = punt_success_df[returned_punts].reset_index(drop=True)

return_td_model = LogisticRegression(**LOGR_KWARGS)
return_td_model.fit(punt_return_td_df[punted_features], punt_return_td_df['return_touchdown'])

In [None]:
returned_nontd = returned_punts & (punt_success_df['return_touchdown'] == 0)

return_yards_df = punt_success_df[returned_nontd].reset_index(drop=True)
return_yards_df['is_pos_return_yards'] = (return_yards_df['return_yards'] > 0).astype(int)
is_pos_return_yards = return_yards_df['is_pos_return_yards'] == 1

is_pos_return_yards_model = LogisticRegression(**LOGR_KWARGS)
is_pos_return_yards_model.fit(return_yards_df[punted_features], return_yards_df['is_pos_return_yards'])

pos_return_yards_model = PoissonRegressor(**POIS_KWARGS)
pos_return_yards_model.fit(return_yards_df[is_pos_return_yards][punted_features], return_yards_df[is_pos_return_yards]['return_yards'] - 1)

neg_return_yards_model = PoissonRegressor(**POIS_KWARGS)
neg_return_yards_model.fit(return_yards_df[~is_pos_return_yards][punted_features], 1 - return_yards_df[~is_pos_return_yards]['return_yards'])


In [None]:
def format_punt_block_coefs(model, name) -> str:
    ret = []
    ret.append(f'\n    pub fn {name}() -> PuntModel {{')
    ret.append("        PuntModel {")
    ret.append(f"            intercept: {model.intercept_[0]:.3f},")
    for feature, coef in zip(punted_features, model.coef_[0]):
        ret.append(f"            {feature}: {coef:.3f},")
    for feature in punted_features[len(model.coef_[0]):]:
        ret.append(f"            {feature}: 0.0,")
    ret.append("        }")
    ret.append("    }\n")
    return '\n'.join(ret)

def format_distance_coefs(model, name) -> str:
    ret = []
    ret.append(f'\n    pub fn {name}() -> PuntModel {{')
    ret.append("        PuntModel {")
    ret.append(f"            intercept: {model.intercept_:.3f},")
    for feature, coef in zip(punted_features, model.coef_):
        ret.append(f"            {feature}: {coef:.3f},")
    for feature in punted_features[len(model.coef_):]:
        ret.append(f"            {feature}: 0.0,")
    ret.append("        }")
    ret.append("    }")
    return '\n'.join(ret)

def format_logr_coefs(model, name) -> str:
    ret = []
    ret.append(f'\n    pub fn {name}() -> PuntModel {{')
    ret.append("        PuntModel {")
    ret.append(f"            intercept: {model.intercept_[0]:.3f},")
    for feature, coef in zip(punted_features, model.coef_[0]):
        ret.append(f"            {feature}: {coef:.3f},")
    ret.append("        }")
    ret.append("    }")
    return '\n'.join(ret)

def format_linr_coefs(model, name) -> str:
    ret = []
    ret.append(f'\n    pub fn {name}() -> PuntModel {{')
    ret.append("        PuntModel {")
    ret.append(f"            intercept: {model.intercept_:.3f},")
    for feature, coef in zip(punted_features, model.coef_):
        ret.append(f"            {feature}: {coef:.3f},")
    ret.append("        }")
    ret.append('    }')
    return '\n'.join(ret)

In [None]:
model_str_parts = ["use crate::models::punt::PuntModel;\n", "impl PuntModel {"]
model_str_parts.append(format_punt_block_coefs(punt_block_model, 'punt_block_coef'))
model_str_parts.append(format_distance_coefs(punt_distance_model, 'punt_distance_coef'))
model_str_parts.append(format_logr_coefs(touchback_model, 'touchback_coef'))
model_str_parts.append(format_logr_coefs(fair_catch_model, 'fair_catch_coef'))
model_str_parts.append(format_logr_coefs(fumlost_model, 'fumble_lost_coef'))
model_str_parts.append(format_logr_coefs(fumlost_rtd_model, 'fumble_lost_return_td_coef'))
model_str_parts.append(format_logr_coefs(return_td_model, 'punt_return_td_coef'))
model_str_parts.append(format_logr_coefs(is_pos_return_yards_model, 'is_pos_punt_return_yards_coef'))
model_str_parts.append(format_linr_coefs(pos_return_yards_model, 'pos_punt_return_yards_coef'))
model_str_parts.append(format_linr_coefs(neg_return_yards_model, 'neg_punt_return_yards_coef'))
model_str_parts.append("}")

model_str = "\n".join(model_str_parts)

with open(f'{MODELS_PATH}/punt/coef.rs', 'w') as f:
    f.write(model_str)
