In [13]:
import sys
sys.path.insert(0, '..')
import utils.game_utils as gu

import math
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt


def get_previous_record(year, week, team, all_games_df):
    records = all_games_df[(all_games_df['year'] == year)
                           & (all_games_df['week'] < week) &
                           (all_games_df['team'] == team)]
    if len(records) < 1:
        return None

    return records[-1:]


def get_all_previous_records(year, week, team, all_games_df):
    records = all_games_df[(all_games_df['year'] == year)
                           & (all_games_df['week'] < week) &
                           (all_games_df['team'] == team)]
    if len(records) < 1:
        return None

    return records

def trim_df(df, fields=None):
    if fields is None:
        fields = [
            'team_wins_before',
            'team_losses_before', 'team_ties_before', 'team_wins_after',
            'team_losses_after', 'team_ties_after', 'team_record_total_before',
            'team_record_total_after', 'team_record_normal_before',
            'team_record_normal_after', 'team_schedule_strength_before',
            'team_schedule_strength_after'
        ]
    out_fields = ['year', 'week', 'team_win', 'team']
    out_fields.extend(fields)
    return df[out_fields]


all_games_df = pd.read_csv('../../data/games/all_games_with_data.csv')


In [65]:
def get_df_with_aggregates(index, all_games_df, side='team'):
    SIDE = side # can be opponent
    OTHER_SIDE = 'opponent' if SIDE == 'team' else 'team'
    game_df = all_games_df.iloc[index]
    year = game_df['year']
    week = game_df['week']
    team = game_df[SIDE]
    opp_team = game_df[OTHER_SIDE]

    previous_df = get_previous_record(year, week, team, all_games_df)
    opp_previous_df = get_previous_record(year, week, opp_team, all_games_df)

    wins_before = 0
    losses_before = 0
    ties_before = 0

    wins_after = 0
    losses_after = 0
    ties_after = 0

    # todo: still need to handle these (before and after?)
    record_total_before = 0
    record_total_after = 0
    record_normal_before = 0
    record_normal_after = 0
    schedule_strength_before = 0
    schedule_strength_after = 0
    # accumulated normalized records of all opponents
    opprec_normal_before = 0
    opprec_normal_after = 1.0



    ## INFO FROM THIS GAME
    win = bool(game_df[f'{SIDE}_win'])
    tie = game_df['team_score'] == game_df['opponent_score'] # no need for SIDE, just see if both scores are the same
    loss = win == False and tie == False

    def get_val_or(df, field):
        val = df.iloc[0][field]
        if np.isnan(val):
            return 0
        return val
    def get_normalized(x, min, max):
        return (x-min) / (max-min)
    def get_opprec_normal_after():
        if week == 1:
            return 1.0

        pre_week = get_val_or(opp_previous_df, 'week')
        pre_opprec_after = get_val_or(opp_previous_df,
                                      f'{SIDE}_opprec_normal_after')
        pre_trn_after = get_val_or(opp_previous_df,
                                   f'{SIDE}_record_normal_after')

        new_opprec = ((pre_opprec_after * pre_week) +
                      (pre_trn_after * pre_week)) / (2 * pre_week)

        return new_opprec

    ## DATA FROM PREVIOUS GAME
    if previous_df is not None:
        try:
            wins_before = get_val_or(previous_df, f'{SIDE}_wins_after')
            losses_before = get_val_or(previous_df, f'{SIDE}_losses_after')
            ties_before = get_val_or(previous_df, f'{SIDE}_ties_after')
            record_total_before = get_val_or(previous_df,
                                              f'{SIDE}_record_total_after')
            record_normal_before = get_val_or(previous_df,
                                              f'{SIDE}_record_normal_after')
            schedule_strength_before = get_val_or(previous_df,
                                              f'{SIDE}_schedule_strength_after')
            opprec_normal_before = get_val_or(previous_df,
                                              f'{SIDE}_opprec_normal_after')

            wins_after = wins_before
            losses_after = losses_before
            ties_after = ties_before
            record_total_after = record_total_before
            record_normal_after = record_normal_before
            schedule_strength_after = schedule_strength_before
            opprec_normal_after = get_opprec_normal_after()

        except Exception as e:
            print(e)

    ## CALCULATE NEW DATA (after)
    if win:
        wins_after += 1
    if tie:
        ties_after += 1
    if loss:
        losses_after += 1

    ## CALCULATE RECORD NORMAL
    record_total_after = wins_after - losses_after # eg: +2 or -7 etc.
    record_normal_after = get_normalized(record_total_after, week * -1, week)


    # todo: maybe we should be authoring is_tie as well?

    all_games_df.at[index, [
        f'{SIDE}_wins_before',
        f'{SIDE}_wins_after',
        f'{SIDE}_losses_before',
        f'{SIDE}_losses_after',
        f'{SIDE}_ties_before',
        f'{SIDE}_ties_after',
        f'{SIDE}_record_total_before',
        f'{SIDE}_record_total_after',
        f'{SIDE}_record_normal_before',
        f'{SIDE}_record_normal_after',
        f'{SIDE}_schedule_strength_before',
        f'{SIDE}_schedule_strength_after',
        f'{SIDE}_opprec_normal_before',
        f'{SIDE}_opprec_normal_after',
    ]] = [
        wins_before, wins_after, losses_before, losses_after, ties_before,
        ties_after, record_total_before, record_total_after,
        record_normal_before, record_normal_after, schedule_strength_before,
        schedule_strength_after, opprec_normal_before,
        opprec_normal_after
    ]

    return all_games_df.iloc[index]




def get_team_df(team, year, all_df):
    return all_df[(all_df['team'] == team) & (all_df['year'] == year)]

def get_year_df(year, all_df):
    return all_df[all_df['year'] == year]

def get_year_up_to_week_df(year, week, all_df):
    return all_df[(all_df['year'] == year) & (all_df['week'] <= week)]


## - - - - - - - - - - - - - - - - - - - -- - - -- - - - - - - -
## C O N F I G
## - - - - - - - - - - - - - - - - - - - -- - - -- - - - - - - -
YEAR = 2018
UP_TO_WEEK = 16
## ADD NEW COLUMNS TO ALL GAMES AND CREATE NEW ALL_GAMES_PLUS_DF (agp_df)
new_col_df = pd.DataFrame(columns=[
    'team_wins_before', 'team_losses_before', 'team_ties_before',
    'team_record_total_before', 'team_record_normal_before', 'team_wins_after',
    'team_losses_after', 'team_ties_after', 'team_record_total_after',
    'team_record_normal_after', 'team_schedule_strength_before',
    'team_schedule_strength_after', 'team_opprec_normal_before',
    'team_opprec_normal_after'
])
agp_df = all_games_df.join(new_col_df, how="outer")
year_df = get_year_df(YEAR, agp_df)
# year_df = get_year_up_to_week_df(YEAR, UP_TO_WEEK, agp_df)

for index in year_df.index:
    get_df_with_aggregates(index, agp_df)

show_fields = [
    'opponent',
    'team_wins_after',
    'team_losses_after',
    'team_ties_after',
    'team_record_total_after',
    'team_record_normal_after',
    'team_opprec_normal_after'
]

year_df = get_year_df(YEAR, agp_df)
# year_df = get_year_up_to_week_df(YEAR, UP_TO_WEEK, agp_df)
# trim_df(year_df, show_fields)

team_df = year_df[year_df['team'] == gu.TEAM_NAME['Saints']]
trim_df(team_df, show_fields)

Unnamed: 0,year,week,team_win,team,opponent,team_wins_after,team_losses_after,team_ties_after,team_record_total_after,team_record_normal_after,team_opprec_normal_after
4106,2018,1,0,New Orleans Saints,Tampa Bay Buccaneers,0,1,0,-1,0.0,1.0
4144,2018,2,1,New Orleans Saints,Cleveland Browns,1,1,0,0,0.5,0.75
4165,2018,3,1,New Orleans Saints,Atlanta Falcons,2,1,0,1,0.666667,0.75
4217,2018,4,1,New Orleans Saints,New York Giants,3,1,0,2,0.75,0.291667
4250,2018,5,1,New Orleans Saints,Washington Redskins,4,1,0,3,0.8,0.770833
4301,2018,7,1,New Orleans Saints,Baltimore Ravens,5,1,0,4,0.785714,0.577083
4335,2018,8,1,New Orleans Saints,Minnesota Vikings,6,1,0,5,0.8125,0.585268
4358,2018,9,1,New Orleans Saints,Los Angeles Rams,7,1,0,6,0.833333,0.696875
4367,2018,10,1,New Orleans Saints,Cincinnati Bengals,8,1,0,7,0.85,0.554427
4412,2018,11,1,New Orleans Saints,Philadelphia Eagles,9,1,0,8,0.863636,0.457825


In [68]:
team_df = year_df[year_df['team'] == gu.TEAM_NAME['Redskins']]
trim_df(team_df, show_fields)

Unnamed: 0,year,week,team_win,team,opponent,team_wins_after,team_losses_after,team_ties_after,team_record_total_after,team_record_normal_after,team_opprec_normal_after
4121,2018,1,1,Washington Redskins,Arizona Cardinals,1,0,0,1,1.0,1.0
4134,2018,2,0,Washington Redskins,Indianapolis Colts,1,1,0,0,0.5,0.5
4166,2018,3,1,Washington Redskins,Green Bay Packers,2,1,0,1,0.666667,0.875
4251,2018,5,0,Washington Redskins,New Orleans Saints,2,2,0,0,0.5,0.520833
4258,2018,6,1,Washington Redskins,Carolina Panthers,3,2,0,1,0.583333,0.589583
4302,2018,7,1,Washington Redskins,Dallas Cowboys,4,2,0,2,0.642857,0.613542
4321,2018,8,1,Washington Redskins,New York Giants,5,2,0,3,0.6875,0.290179
4344,2018,9,0,Washington Redskins,Atlanta Falcons,5,3,0,2,0.611111,0.384077
4371,2018,10,1,Washington Redskins,Tampa Bay Buccaneers,6,3,0,3,0.65,0.535925
4396,2018,11,0,Washington Redskins,Houston Texans,6,4,0,2,0.590909,0.607664
