In [None]:
import math
import pandas as pd
from sklearn.linear_model import LogisticRegression, PoissonRegressor

from pbp.data import (
    CURRENT_SEASON,
    MODELS_PATH,
    LOGR_KWARGS,
    POIS_KWARGS,
    common_timeout_features,
    only_def_timeout_features,
    only_off_timeout_features,
    state_features,
    add_clock_stops,
    add_playcall_features,
    load_pbp_data,
    make_penalty_zs,
    make_proe_data,
)

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

In [None]:
try:
    pbp.head(1)
except:
    pbp_copy = load_pbp_data(range(2017, CURRENT_SEASON + 1))

In [None]:
pbp = pbp_copy.copy()

drive_keys = ['game_id','drive','posteam','defteam', 'posteam_score', 'defteam_score']
drive_teams = (
    pbp[~pbp['posteam'].isna() & ~pbp['defteam'].isna() & ~pbp['posteam_score'].isna() & ~pbp['defteam_score'].isna()]
    [drive_keys]
    .groupby(drive_keys, as_index=False)
    .nth(0)
    .rename(columns={'posteam': 'drive_offense', 'defteam': 'drive_defense', 'posteam_score': 'off_score', 'defteam_score': 'def_score'})
)
pbp['playcall'] = ''
pbp = pbp.merge(drive_teams, on=['game_id', 'drive'], how='left').drop_duplicates(subset=['play_id','game_id'])

pbp.loc[pbp['off_score'].isna(), 'off_score'] = pbp['posteam_score']
pbp.loc[pbp['def_score'].isna(), 'def_score'] = pbp['defteam_score']


In [None]:
(offense_proes, defense_proes, offense_rz_proes, defense_rz_proes) = make_proe_data(pbp)

In [None]:
OFFENSIVE_PENALTIES_MAP = {
    'Delay of Game': ('5', False),
    'False Start': ('5', False),
    'Illegal Formation': ('5', False),
    'Illegal Motion': ('5', False),
    'Illegal Shift': ('5', False),
    'Illegal Substitution': ('5', False),
    'Offensive 12 On-field': ('5', False),
    'Offensive Offside': ('5', False),
    'Offensive Too Many Men on Field': ('5', False),
    'Ineligible Downfield Kick': ('5', False),
    'Ineligible Downfield Pass': ('5', False),
    'Illegal Block Above the Waist': ('10', False),
    'Offensive Holding': ('10', False),
    'Offensive Pass Interference': ('10', False),
    'Illegal Use of Hands': ('10', False),
    'Tripping': ('15', False),
    'Chop Block': ('15', False),
    'Clipping': ('15', False),
    'Face Mask': ('15', False),
    'Illegal Blindside Block': ('15', False),
    'Illegal Crackback': ('15', False),
    'Illegal Peelback': ('15', False),
    'Low Block': ('15', False),
    'Lowering the Head to Initiate Contact': ('15', False),
    'Taunting': ('15', False),
    'Unnecessary Roughness': ('15', False),
    'Unsportsmanlike Conduct': ('15', False),
    'Disqualification': ('Varies', False),
    'Illegal Touch Kick': ('Varies', False),
    'Illegal Touch Pass': ('Varies', False),
    'Player Out of Bounds on Kick': ('Varies', False),
    'Player Out of Bounds on Punt': ('Varies', False),
    'Illegal Kick/Kicking Loose Ball': ('10', False),
    'Illegal Bat': ('10', False),
    # don't need yardage model for this
    'Illegal Forward Pass': ('5', True),
    'Intentional Grounding': ('Varies', True),
}

DEFENSIVE_PENALTIES_MAP = {
    # no automatic first. all of these are 5 yards, so no model
    'Defensive Offside': ('5', False),
    'Neutral Zone Infraction': ('5', False),
    'Defensive 12 On-field': ('5', False),
    'Defensive Too Many Men on Field': ('5', False),
    'Encroachment': ('5', False),
    'Defensive Delay of Game': ('5', False),
    'Delay of Game': ('5', False),
    'Illegal Substitution': ('5', False),
    'Running Into the Kicker': ('5', False),
    # automatic first downs. 5, 15 or "varies" yards
    'Illegal Formation': ('5', True),
    'Defensive Holding': ('5', True),
    'Illegal Contact': ('5', True),
    'Illegal Use of Hands': ('5', True),
    'Tripping': ('15', True),
    'Taunting': ('15', True),
    'Low Block': ('15', True),
    'Unsportsmanlike Conduct': ('15', True),
    'Roughing the Kicker': ('15', True),
    'Roughing the Passer': ('15', True),
    'Face Mask': ('15', True),
    'Horse Collar Tackle': ('15', True),
    'Leverage': ('15', True),
    'Leaping': ('15', True),
    'Lowering the Head to Initiate Contact': ('15', True),
    'Lowering the Head to Make Forcible Contact': ('15', True),
    'Unnecessary Roughness': ('15', True),
    'Defensive Pass Interference': ('Varies', True),
    'Disqualification': ('Varies', True),
}

pbp['penalty_offense'] = ((pbp['penalty_team'] == pbp['posteam']) & (pbp['penalty_type'].isin(OFFENSIVE_PENALTIES_MAP))).astype(int)
pbp['penalty_defense'] = ((pbp['penalty_team'] == pbp['defteam']) & (pbp['penalty_type'].isin(DEFENSIVE_PENALTIES_MAP))).astype(int)

pbp['playcall'] = ''
pbp.loc[pbp['penalty_offense'] == 1, 'playcall'] = 'PENALTY_OFFENSE'
pbp.loc[pbp['penalty_defense'] == 1, 'playcall'] = 'PENALTY_DEFENSE'

In [None]:
pbp['home_timeout'] = 0
pbp['away_timeout'] = 0

pbp.loc[pbp['timeout'] == 'OFFENSIVE_TIMEOUT', 'home_timeout'] = (pbp['drive_offense'] == pbp['home_team']).astype(int)
pbp.loc[pbp['timeout'] == 'DEFENSIVE_TIMEOUT', 'home_timeout'] = (pbp['drive_defense'] == pbp['home_team']).astype(int)

pbp.loc[pbp['timeout'] == 'OFFENSIVE_TIMEOUT', 'away_timeout'] = (pbp['drive_offense'] == pbp['away_team']).astype(int)
pbp.loc[pbp['timeout'] == 'DEFENSIVE_TIMEOUT', 'away_timeout'] = (pbp['drive_defense'] == pbp['away_team']).astype(int)

pbp['home_timeout_prev'] = pbp['home_timeout'].shift(1)
pbp['away_timeout_prev'] = pbp['away_timeout'].shift(1)
pbp['duplicate_timeout'] = (
    ((pbp['home_timeout_prev'] == 1) & (pbp['home_timeout'] == 1))
    | ((pbp['away_timeout_prev'] == 1) & (pbp['away_timeout'] == 1))
)
pbp.loc[pbp['home_timeout_prev'] == 1, 'home_timeout'] = 0
pbp.loc[pbp['away_timeout_prev'] == 1, 'away_timeout'] = 0

pbp.loc[pbp['duplicate_timeout'], 'timeout'] = 'DUPLICATE'
pbp.loc[pbp['duplicate_timeout'], 'playcall'] = 'DUPLICATE_TIMEOUT'

pbp['total_home_timeouts'] = pbp.groupby(['game_id','home_team', 'game_half'])['home_timeout'].cumsum()
pbp['total_away_timeouts'] = pbp.groupby(['game_id','away_team', 'game_half'])['away_timeout'].cumsum()


In [None]:
add_clock_stops(pbp)

In [None]:
pbp['off_timeouts_left'] = 3
pbp['def_timeouts_left'] = 3

pbp.loc[pbp['drive_offense'] == pbp['home_team'], 'off_timeouts_left'] = 3 - pbp['total_home_timeouts']
pbp.loc[pbp['drive_offense'] == pbp['away_team'], 'off_timeouts_left'] = 3 - pbp['total_away_timeouts']

pbp.loc[pbp['drive_defense'] == pbp['home_team'], 'def_timeouts_left'] = 3 - pbp['total_home_timeouts']
pbp.loc[pbp['drive_defense'] == pbp['away_team'], 'def_timeouts_left'] = 3 - pbp['total_away_timeouts']

pbp['off_timeouts_remaining'] = pbp['posteam_timeouts_remaining'].combine_first(pbp['off_timeouts_left']).astype(int).clip(0, 3)
pbp['def_timeouts_remaining'] = pbp['defteam_timeouts_remaining'].combine_first(pbp['def_timeouts_left']).astype(int).clip(0, 3)

pbp['down'] = pbp['down'].apply(lambda x: str(int(x)) if not (math.isnan(x) or isinstance(x, str)) else x)
pbp['qtr'] = pbp['qtr'].apply(lambda x: str(int(x)) if not (math.isnan(x) or isinstance(x, str)) else x)

In [None]:
responses = [
    'timeout',
    'play_type',
    # other
    'extra_point_attempt',
    'two_point_attempt',
    'location',
    'rushing_yards',
    'yards_gained',
    'touchdown',
    'fumble_lost',
    'qb_scramble',
    'sack',
    'receiver_player_id',
    'incomplete_pass',
    'complete_pass',
    'interception',
]

penalty_responses = [
    'penalty',
    'penalty_type',
    'penalty_yards',
    'penalty_team',
    # doesn't have auto, will have to back it out
    'first_down',
    'penalty_offense',
    'penalty_defense',
]

raw_features = [
    'down',
    'ydstogo',
    'goal_to_go',
    'qtr', 
    'clock_running',
    # 'quarter_seconds_remaining',
    'yardline_100',
    'off_score',
    'def_score',
    # 'play_clock',
    'game_seconds_remaining',
    'half_seconds_remaining',
    # 'wp', 
    'off_timeouts_remaining',
    'def_timeouts_remaining',
    # 'total', 'spread_line',
]

cols = responses + ['posteam', 'defteam', 'season', 'home_team'] + raw_features + penalty_responses

In [None]:
off_penalties, def_penalties = make_penalty_zs(pbp)

In [None]:
dataset = (
    pd.get_dummies(pbp[cols], columns=['down', 'qtr', 'off_timeouts_remaining', 'def_timeouts_remaining'])
    .merge(offense_proes, how='left', on=['posteam','season'])
    .merge(defense_proes, how='left', on=['defteam','season'])
    .merge(offense_rz_proes, how='left', on=['posteam','season'])
    .merge(defense_rz_proes, how='left', on=['defteam','season'])
)

add_playcall_features(dataset)

is_penalty = (dataset['penalty'] == 1)
is_presnap_penalty = (dataset['play_type'] == 'no_play') & is_penalty
# ignoring penalties after punts/fgs... seems fine
postsnap_penalty_playtypes = ['run','pass']
is_postplay_penalty = (dataset['play_type'].isin(postsnap_penalty_playtypes)) & is_penalty

off_penalty_filter = dataset['penalty_team'] == dataset['posteam']
def_penalty_filter = dataset['penalty_team'] == dataset['defteam']

presnap_penalties = (
    is_presnap_penalty
    & (
        (off_penalty_filter & dataset['penalty_type'].isin(OFFENSIVE_PENALTIES_MAP))
        | (def_penalty_filter & dataset['penalty_type'].isin(DEFENSIVE_PENALTIES_MAP))
    ) & (dataset['play_type'] != 'qb_kneel')
)

postplay_penalties = (
    is_postplay_penalty
    & (
        (off_penalty_filter & dataset['penalty_type'].isin(OFFENSIVE_PENALTIES_MAP))
        | (def_penalty_filter & dataset['penalty_type'].isin(DEFENSIVE_PENALTIES_MAP))
    )
)

In [None]:
state_playcall_features = [
    *state_features,
    *common_timeout_features,
    *only_off_timeout_features,
    *only_def_timeout_features,
]
playcall_features = [
    *state_playcall_features,
    'offense_penalty_z',
    'defense_penalty_z',
    'off_def_penalty_z',
]
is_down = (dataset['down_1'] + dataset['down_2'] + dataset['down_3'] + dataset['down_4']) == 1
has_yards = dataset['penalty_yards'] != 0

presnap_penalty_df = (
    dataset[has_yards & is_down & is_presnap_penalty]
    .merge(off_penalties, how='left', on=['posteam','season'])
    .merge(def_penalties, how='left', on=['defteam','season'])
)

postplay_penalty_df = (
    dataset[dataset['play_type'].isin(postsnap_penalty_playtypes)]
    .merge(off_penalties, how='left', on=['posteam','season'])
    .merge(def_penalties, how='left', on=['defteam','season'])
)

presnap_penalty_df['off_def_penalty_z'] = presnap_penalty_df['offense_penalty_z'] * presnap_penalty_df['defense_penalty_z']
postplay_penalty_df['off_def_penalty_z'] = postplay_penalty_df['offense_penalty_z'] * postplay_penalty_df['defense_penalty_z']

In [None]:
op_df = presnap_penalty_df[presnap_penalty_df['penalty_offense'] == 1].reset_index(drop=True)
op_df['yards_class'] = op_df['penalty_type'].apply(lambda x: OFFENSIVE_PENALTIES_MAP[x][0])
op_df['loss_of_down'] = op_df['penalty_type'].apply(lambda x: OFFENSIVE_PENALTIES_MAP[x][1]).astype(int)

dp_df = presnap_penalty_df[presnap_penalty_df['penalty_defense'] == 1].reset_index(drop=True)
dp_df['yards_class'] = dp_df['penalty_type'].apply(lambda x: DEFENSIVE_PENALTIES_MAP[x][0])
dp_df['automatic_first'] = dp_df['penalty_type'].apply(lambda x: DEFENSIVE_PENALTIES_MAP[x][1]).astype(int)


In [None]:
op_ld_model = LogisticRegression(**LOGR_KWARGS)
op_ld_model.fit(op_df[playcall_features], op_df['loss_of_down'])

In [None]:
dp_af_model = LogisticRegression(**LOGR_KWARGS)
dp_af_model.fit(dp_df[playcall_features], dp_df['automatic_first'])

In [None]:
op_yards_df = op_df[(op_df['loss_of_down'] == 0) & (op_df['yards_class'] != 'Varies')].reset_index(drop=True)
op_yards_dummies = pd.get_dummies(op_yards_df['yards_class'], prefix='yards')
op_yards_df[op_yards_dummies.columns] = op_yards_dummies

In [None]:
op_5yards_model = LogisticRegression(**LOGR_KWARGS)
op_5yards_model.fit(op_yards_df[playcall_features], op_yards_df['yards_5'])

yards_not5 = (op_yards_df['yards_5'] == 0)
op_15yards_model = LogisticRegression(**LOGR_KWARGS)
op_15yards_model.fit(op_yards_df[yards_not5][playcall_features], op_yards_df[yards_not5]['yards_15'])

In [None]:
dp_yards_df = dp_df[(dp_df['automatic_first'] == 1)].reset_index(drop=True)
dp_yards_dummies = pd.get_dummies(dp_yards_df['yards_class'], prefix='yards')
dp_yards_df[dp_yards_dummies.columns] = dp_yards_dummies


In [None]:
dp_5yards_model = LogisticRegression(**LOGR_KWARGS)
dp_5yards_model.fit(dp_yards_df[playcall_features], dp_yards_df['yards_5'])

yards_not5 = (dp_yards_df['yards_5'] == 0)
dp_15_yards_model = LogisticRegression(**LOGR_KWARGS)
dp_15_yards_model.fit(dp_yards_df[yards_not5][playcall_features], dp_yards_df[yards_not5]['yards_15'])

In [None]:
# op_varies_filter = op_df['yards_class'] == 'Varies'

# op_varies_model = PoissonRegressor(**RIDGE_KWARGS)
# op_varies_model.fit(op_df[op_varies_filter][playcall_features], op_df[op_varies_filter]['penalty_yards'])

In [None]:
dp_varies_filter = dp_df['yards_class'] == 'Varies'

dp_varies_df = dp_df[dp_varies_filter].reset_index(drop=True)
dp_varies_df['end_yardline'] = dp_varies_df['yardline_100'] - dp_varies_df['penalty_yards']
dp_varies_df['to_1_yardline'] = (dp_varies_df['end_yardline'] <= 1).astype(int)

dp_varies_to_1_model = LogisticRegression(**LOGR_KWARGS)
dp_varies_to_1_model.fit(dp_varies_df[playcall_features], dp_varies_df['to_1_yardline'])
dp_varies_at_1_pred = dp_varies_to_1_model.predict_proba(dp_varies_df[playcall_features])[:,1]

# toss out half distance to the goal, as it will mess up our calculations
is_half_distance = (dp_varies_df['yardline_100'] / 2 < dp_varies_df['penalty_yards']) & (dp_varies_df['penalty_yards'] < (dp_varies_df['yardline_100'] + 1) / 2)
dp_varies_not_1_df = dp_varies_df[(dp_varies_df['to_1_yardline'] == 0) & ~is_half_distance].reset_index(drop=True)

dp_varies_model = PoissonRegressor(**POIS_KWARGS)
dp_varies_model.fit(dp_varies_not_1_df[playcall_features], dp_varies_not_1_df['penalty_yards'] - 1)
dp_varies_pred = dp_varies_model.predict(dp_varies_not_1_df[playcall_features])

dp_varies_var_model = PoissonRegressor(**POIS_KWARGS)
dp_varies_var_model.fit(dp_varies_not_1_df[playcall_features], (dp_varies_not_1_df['penalty_yards'] - 1 - dp_varies_pred)**2)
dp_varies_var_pred = dp_varies_var_model.predict(dp_varies_not_1_df[playcall_features])

In [None]:
def format_presnap_logr_coefs(model: LogisticRegression, name: str) -> str:
    ret = []
    ret.append(f'\n    pub fn {name}_coef() -> PenaltyModel {{')
    ret.append('        PenaltyModel{')
    ret.append(f'            intercept: {model.intercept_[0]:.3f},')
    for c, f in zip(model.coef_[0], playcall_features):
        ret.append(f'            {f}: {c:.3f},')
    ret.append('        }')
    ret.append('    }\n')
    return "\n".join(ret)


def format_presnap_linr_coefs(model: PoissonRegressor, name: str) -> str:
    ret = []
    ret.append(f'    pub fn {name}_coef() -> PenaltyModel {{')
    ret.append('        PenaltyModel{')
    ret.append(f'            intercept: {model.intercept_:.3f},')
    for c, f in zip(model.coef_, playcall_features):
        ret.append(f'            {f}: {c:.3f},')
    ret.append('        }')
    ret.append('    }\n')
    return "\n".join(ret)    


In [None]:
model_str_parts = ["use crate::models::penalty::PenaltyModel;\n", "impl PenaltyModel {"]

model_str_parts.append(format_presnap_logr_coefs(op_ld_model, 'offensive_loss_of_down'))
model_str_parts.append(format_presnap_logr_coefs(dp_af_model, 'defensive_automatic_first'))

model_str_parts.append(format_presnap_logr_coefs(op_5yards_model, 'offensive_5_yards'))
model_str_parts.append(format_presnap_logr_coefs(op_15yards_model, 'offensive_15_yards'))

model_str_parts.append(format_presnap_logr_coefs(dp_5yards_model, 'defensive_5_yards'))
model_str_parts.append(format_presnap_logr_coefs(dp_15_yards_model, 'defensive_15_yards'))

# TODO: later
# print_linr_coefs(op_varies_model, 'offensive_yards_vary_coef')
model_str_parts.append(format_presnap_logr_coefs(dp_varies_to_1_model, 'defensive_yards_to_1'))
model_str_parts.append(format_presnap_linr_coefs(dp_varies_model, 'defensive_yards_vary'))
model_str_parts.append(format_presnap_linr_coefs(dp_varies_var_model, 'defensive_yards_vary_var'))

model_str_parts.append("}")

model_str = "\n".join(model_str_parts)

with open(f'{MODELS_PATH}/penalty/coef.rs', 'w') as f:
    f.write(model_str)

In [None]:
include_ppp = (postplay_penalty_df['touchdown'] == 0) & (postplay_penalty_df['fumble_lost'] == 0)
post_rush_penalty_df = postplay_penalty_df[include_ppp & (postplay_penalty_df['play_type'] == 'run') & (postplay_penalty_df['qb_scramble'] == 0)].fillna({'rushing_yards': 0}).reset_index(drop=True)
post_rush_penalty_df['rushing_yards_div10'] = post_rush_penalty_df['rushing_yards'] / 10
post_rush_penalty_df['rushing_yards_div10_sq'] = post_rush_penalty_df['rushing_yards_div10'] ** 2

post_pass_penalty_df = (
    postplay_penalty_df[include_ppp & ((postplay_penalty_df['play_type'] == 'pass') | (postplay_penalty_df['qb_scramble'] == 1))]
    .fillna({
        'yards_gained': 0,
        # 'qb_scramble': 0,
        # 'sack': 0,
    })
    .reset_index(drop=True)
)
post_pass_penalty_df['yards_gained_div10'] = post_pass_penalty_df['yards_gained'] / 10
post_pass_penalty_df['yards_gained_div10_sq'] = post_pass_penalty_df['yards_gained_div10'] ** 2
post_pass_penalty_df['throwaway'] = ((post_pass_penalty_df['incomplete_pass'] == 1) & (post_pass_penalty_df['receiver_player_id'].isna())).astype(int)
post_pass_penalty_df['target_incomplete'] = post_pass_penalty_df['incomplete_pass'] - post_pass_penalty_df['throwaway']
post_pass_penalty_df['target_complete'] = post_pass_penalty_df['complete_pass']

In [None]:
post_rush_penalty_features = [*playcall_features, 'rushing_yards_div10', 'rushing_yards_div10_sq']
post_pass_penalty_features = [*playcall_features, 'qb_scramble', 'sack', 'throwaway', 'target_incomplete', 'target_complete', 'interception', 'yards_gained_div10', 'yards_gained_div10_sq']
post_pass_penalty_yards_features = [*post_pass_penalty_features, 'loss_of_down']

In [None]:
def format_postrush_logr_coefs(model: LogisticRegression, name: str) -> str:
    ret = []
    ret.append(f'\n    pub fn {name}_coef() -> PostRushPenaltyModel {{')
    ret.append('        PostRushPenaltyModel{')
    ret.append(f'            intercept: {model.intercept_[0]:.3f},')
    for c, f in zip(model.coef_[0], post_rush_penalty_features):
        ret.append(f'            {f}: {c:.3f},')
    ret.append('        }')
    ret.append('    }\n')
    return "\n".join(ret)

def format_postpass_logr_coefs(model: LogisticRegression, name: str) -> str:
    ret = []
    ret.append(f'\n    pub fn {name}_coef() -> PostPassPenaltyModel {{')
    ret.append('        PostPassPenaltyModel{')
    ret.append(f'            intercept: {model.intercept_[0]:.3f},')
    for c, f in zip(model.coef_[0], post_pass_penalty_yards_features):
        ret.append(f'            {f}: {c:.3f},')
    for f in post_pass_penalty_yards_features[len(model.coef_[0]):]:
        ret.append(f'            {f}: 0.0,')
    ret.append('        }')
    ret.append('    }\n')
    return "\n".join(ret)


In [None]:
# either 5, 10 or 15
is_postrush_off_penalty_model = LogisticRegression(**LOGR_KWARGS)
is_postrush_off_penalty_model.fit(post_rush_penalty_df[post_rush_penalty_features], post_rush_penalty_df['penalty_offense'])
is_postrush_off_penalty_model_pred = is_postrush_off_penalty_model.predict_proba(post_rush_penalty_df[post_rush_penalty_features])[:,1]

postrush_off_df = post_rush_penalty_df[post_rush_penalty_df['penalty_offense'] == 1]

is_off_postrush_penalty_df = post_rush_penalty_df[post_rush_penalty_df['penalty_offense'] == 1].reset_index(drop=True)
is_5_off_postrush_penalty_yards = is_off_postrush_penalty_df['penalty_type'].map(lambda x: OFFENSIVE_PENALTIES_MAP[x][0] == '5').astype(int)
is_off_postrush_penalty_df['is_10_off_postrush_penalty_yards'] = is_off_postrush_penalty_df['penalty_type'].map(lambda x: OFFENSIVE_PENALTIES_MAP[x][0] == '10').astype(int)

is_5_off_postrush_penalty_yards_model = LogisticRegression(**LOGR_KWARGS)
is_5_off_postrush_penalty_yards_model.fit(is_off_postrush_penalty_df[post_rush_penalty_features], is_5_off_postrush_penalty_yards)

is_not_5_oprpy = is_5_off_postrush_penalty_yards == 0

is_10_off_postrush_penalty_yards_model = LogisticRegression(**LOGR_KWARGS)
is_10_off_postrush_penalty_yards_model.fit(is_off_postrush_penalty_df[is_not_5_oprpy][post_rush_penalty_features], is_off_postrush_penalty_df[is_not_5_oprpy]['is_10_off_postrush_penalty_yards'])

# either 5 or 15
is_postrush_def_penalty_model = LogisticRegression(**LOGR_KWARGS)
is_postrush_def_penalty_model.fit(post_rush_penalty_df[post_rush_penalty_features], post_rush_penalty_df['penalty_defense'])
is_postrush_def_penalty_model_pred = is_postrush_def_penalty_model.predict_proba(post_rush_penalty_df[post_rush_penalty_features])[:,1]

is_def_postrush_penalty_df = post_rush_penalty_df[post_rush_penalty_df['penalty_defense'] == 1]
is_5_def_postrush_penalty_yards = is_def_postrush_penalty_df['penalty_type'].map(lambda x: DEFENSIVE_PENALTIES_MAP[x][0] == '5').astype(int)

is_5_def_postrush_penalty_yards_model = LogisticRegression(**LOGR_KWARGS)
is_5_def_postrush_penalty_yards_model.fit(is_def_postrush_penalty_df[post_rush_penalty_features], is_5_def_postrush_penalty_yards)

In [None]:
# print('pub struct PostRushPenaltyModel {')
# print('    intercept: f32,')
# for f in post_rush_penalty_features:
#     print(f'    {f}: f32,')
# print('}')

In [None]:
model_str_parts = ["use crate::models::post_rush_penalty::PostRushPenaltyModel;\n", "impl PostRushPenaltyModel {"]

model_str_parts.append(format_postrush_logr_coefs(is_postrush_off_penalty_model, 'is_postrush_off_penalty'))
model_str_parts.append(format_postrush_logr_coefs(is_5_off_postrush_penalty_yards_model, 'is_5_postrush_off_penalty_yards'))
model_str_parts.append(format_postrush_logr_coefs(is_10_off_postrush_penalty_yards_model, 'is_10_postrush_off_penalty_yards'))

model_str_parts.append(format_postrush_logr_coefs(is_postrush_def_penalty_model, 'is_postrush_def_penalty'))
model_str_parts.append(format_postrush_logr_coefs(is_5_def_postrush_penalty_yards_model, 'is_5_postrush_def_penalty_yards'))

model_str_parts.append("}")

model_str = "\n".join(model_str_parts)

with open(f'{MODELS_PATH}/post_rush_penalty/coef.rs', 'w') as f:
    f.write(model_str)

In [None]:
# either 5, 10 or 15
is_postpass_off_penalty_model = LogisticRegression(**LOGR_KWARGS)
is_postpass_off_penalty_model.fit(post_pass_penalty_df[post_pass_penalty_features], post_pass_penalty_df['penalty_offense'])
is_postpass_off_penalty_model_pred = is_postpass_off_penalty_model.predict_proba(post_pass_penalty_df[post_pass_penalty_features])[:,1]

postpass_off_df = post_pass_penalty_df[post_pass_penalty_df['penalty_offense'] == 1]

is_off_postpass_penalty_df = post_pass_penalty_df[post_pass_penalty_df['penalty_offense'] == 1].reset_index(drop=True)
is_off_postpass_penalty_df['loss_of_down'] = is_off_postpass_penalty_df['penalty_type'].map(lambda x: OFFENSIVE_PENALTIES_MAP[x][1]).astype(int)
is_5_off_postpass_penalty_yards = is_off_postpass_penalty_df['penalty_type'].map(lambda x: OFFENSIVE_PENALTIES_MAP[x][0] == '5').astype(int)
is_10_off_postpass_penalty_yards = is_off_postpass_penalty_df['penalty_type'].map(lambda x: OFFENSIVE_PENALTIES_MAP[x][0] == '10').astype(int)

is_loss_of_down_model = LogisticRegression(**LOGR_KWARGS)
is_loss_of_down_model.fit(is_off_postpass_penalty_df[post_pass_penalty_features], is_off_postpass_penalty_df['loss_of_down'])

is_5_off_postpass_penalty_yards_model = LogisticRegression(**LOGR_KWARGS)
is_5_off_postpass_penalty_yards_model.fit(is_off_postpass_penalty_df[post_pass_penalty_yards_features], is_5_off_postpass_penalty_yards)

is_10_off_postpass_penalty_yards_model = LogisticRegression(**LOGR_KWARGS)
is_10_off_postpass_penalty_yards_model.fit(is_off_postpass_penalty_df[post_pass_penalty_yards_features], is_10_off_postpass_penalty_yards)

# either 5 or 15
is_postpass_def_penalty_model = LogisticRegression(**LOGR_KWARGS)
is_postpass_def_penalty_model.fit(post_pass_penalty_df[post_pass_penalty_features], post_pass_penalty_df['penalty_defense'])
is_postpass_def_penalty_model_pred = is_postpass_def_penalty_model.predict_proba(post_pass_penalty_df[post_pass_penalty_features])[:,1]

is_def_postpass_penalty_df = post_pass_penalty_df[post_pass_penalty_df['penalty_defense'] == 1]
is_5_def_postpass_penalty_yards = is_def_postpass_penalty_df['penalty_type'].map(lambda x: DEFENSIVE_PENALTIES_MAP[x][0] == '5').astype(int)

is_def_postpass_penalty_df = post_pass_penalty_df[post_pass_penalty_df['penalty_defense'] == 1]
is_5_def_postpass_penalty_yards = is_def_postpass_penalty_df['penalty_type'].map(lambda x: DEFENSIVE_PENALTIES_MAP[x][0] == '5').astype(int)

is_5_def_postpass_penalty_yards_model = LogisticRegression(**LOGR_KWARGS)
is_5_def_postpass_penalty_yards_model.fit(is_def_postpass_penalty_df[post_pass_penalty_features], is_5_def_postpass_penalty_yards)

In [None]:
# is_postpass_off_penalty_model_pred.mean(), is_postpass_def_penalty_model_pred.mean()

In [None]:
# print('pub struct PostPassPenaltyModel {')
# print('    intercept: f32,')
# for f in post_pass_penalty_features:
#     print(f'    {f}: f32,')
# print('}')

In [None]:
model_str_parts = ["use crate::models::post_pass_penalty::PostPassPenaltyModel;\n", "impl PostPassPenaltyModel {"]

model_str_parts.append(format_postpass_logr_coefs(is_postpass_off_penalty_model, 'is_postpass_off_penalty'))
model_str_parts.append(format_postpass_logr_coefs(is_5_off_postpass_penalty_yards_model, 'is_5_postpass_off_penalty_yards'))
model_str_parts.append(format_postpass_logr_coefs(is_10_off_postpass_penalty_yards_model, 'is_10_postpass_off_penalty_yards'))

model_str_parts.append(format_postpass_logr_coefs(is_postpass_def_penalty_model, 'is_postpass_def_penalty'))
model_str_parts.append(format_postpass_logr_coefs(is_5_def_postpass_penalty_yards_model, 'is_5_postpass_def_penalty_yards'))

model_str_parts.append("}")

model_str = "\n".join(model_str_parts)

with open(f'{MODELS_PATH}/post_pass_penalty/coef.rs', 'w') as f:
    f.write(model_str)

In [None]:
post_rush_penalty_df['penalty_offense'].mean(), post_rush_penalty_df['penalty_defense'].mean()

# post_rush_penalty_df[post_rush_penalty_df['penalty_offense'] == 1]['penalty_yards'].mean(), post_rush_penalty_df[post_rush_penalty_df['penalty_defense'] == 1]['penalty_yards'].mean()

In [None]:
post_pass_penalty_df['penalty_offense'].mean(), post_pass_penalty_df['penalty_defense'].mean()

# post_pass_penalty_df[post_pass_penalty_df['penalty_offense'] == 1]['penalty_yards'].mean(), post_pass_penalty_df[post_pass_penalty_df['penalty_defense'] == 1]['penalty_yards'].mean()