In [1]:
from sportsipy.nfl.boxscore import Boxscores, Boxscore
from sportsipy.nfl.schedule import Schedule, Game
from sportsipy.nfl.teams import Teams
from pathlib import Path
import pandas as pd
import numpy as np
pd.set_option('display.max_columns', None)

In [2]:
# The game_Data function is extracting game statistics for each game. It loops through each game and each week grabbing the statistics referenced.
# This function creates the foundation for our final dataset.
def game_data(game_df,game_stats):
    try:
        # Creates a dataframe for the away_team and the home_team. Sets column names to be exact matches between the two.
        away_team_df = game_df[['away_name', 'away_abbr', 'away_score']].rename(columns = {'away_name': 'team_name', 'away_abbr': 'team_abbr', 'away_score': 'score'})
        home_team_df = game_df[['home_name','home_abbr', 'home_score']].rename(columns = {'home_name': 'team_name', 'home_abbr': 'team_abbr', 'home_score': 'score'})
        try:
            if game_df.loc[0,'away_score'] > game_df.loc[0,'home_score']:
                away_team_df = pd.merge(away_team_df, pd.DataFrame({'game_won' : [1], 'game_lost' : [0]}),left_index = True, right_index = True)
                home_team_df = pd.merge(home_team_df, pd.DataFrame({'game_won' : [0], 'game_lost' : [1]}),left_index = True, right_index = True)
            elif game_df.loc[0,'away_score'] < game_df.loc[0,'home_score']:
                away_team_df = pd.merge(away_team_df, pd.DataFrame({'game_won' : [0], 'game_lost' : [1]}),left_index = True, right_index = True)
                home_team_df = pd.merge(home_team_df, pd.DataFrame({'game_won' : [1], 'game_lost' : [0]}),left_index = True, right_index = True)
            else: 
                away_team_df = pd.merge(away_team_df, pd.DataFrame({'game_won' : [0], 'game_lost' : [0]}),left_index = True, right_index = True)
                home_team_df = pd.merge(home_team_df, pd.DataFrame({'game_won' : [0], 'game_lost' : [0]}),left_index = True, right_index = True)
        except TypeError:
                away_team_df = pd.merge(away_team_df, pd.DataFrame({'game_won' : [np.nan], 'game_lost' : [np.nan]}),left_index = True, right_index = True)
                home_team_df = pd.merge(home_team_df, pd.DataFrame({'game_won' : [np.nan], 'game_lost' : [np.nan]}),left_index = True, right_index = True)        

        # Creating the away_team & home_team stats dataframe. Grabbing the selected stats and then renaming them to match home == away dataframe column names.
        away_stats_df = game_stats.dataframe[['away_first_downs', 'away_fourth_down_attempts',
               'away_fourth_down_conversions', 'away_fumbles', 'away_fumbles_lost',
               'away_interceptions', 'away_net_pass_yards', 'away_pass_attempts',
               'away_pass_completions', 'away_pass_touchdowns', 'away_pass_yards',
               'away_penalties', 'away_points', 'away_rush_attempts',
               'away_rush_touchdowns', 'away_rush_yards', 'away_third_down_attempts',
               'away_third_down_conversions', 'away_time_of_possession',
               'away_times_sacked', 'away_total_yards', 'away_turnovers',
               'away_yards_from_penalties', 'away_yards_lost_from_sacks']].reset_index().drop(columns ='index').rename(columns = {
               'away_first_downs': 'first_downs', 'away_fourth_down_attempts':'fourth_down_attempts',
               'away_fourth_down_conversions':'fourth_down_conversions' , 'away_fumbles': 'fumbles', 'away_fumbles_lost': 'fumbles_lost',
               'away_interceptions': 'interceptions', 'away_net_pass_yards':'net_pass_yards' , 'away_pass_attempts': 'pass_attempts',
               'away_pass_completions':'pass_completions' , 'away_pass_touchdowns': 'pass_touchdowns', 'away_pass_yards': 'pass_yards',
               'away_penalties': 'penalties', 'away_points': 'points', 'away_rush_attempts': 'rush_attempts',
               'away_rush_touchdowns': 'rush_touchdowns', 'away_rush_yards': 'rush_yards', 'away_third_down_attempts': 'third_down_attempts',
               'away_third_down_conversions': 'third_down_conversions', 'away_time_of_possession': 'time_of_possession',
               'away_times_sacked': 'times_sacked', 'away_total_yards': 'total_yards', 'away_turnovers': 'turnovers',
               'away_yards_from_penalties':'yards_from_penalties', 'away_yards_lost_from_sacks': 'yards_lost_from_sacks'})

        home_stats_df = game_stats.dataframe[['home_first_downs', 'home_fourth_down_attempts',
               'home_fourth_down_conversions', 'home_fumbles', 'home_fumbles_lost',
               'home_interceptions', 'home_net_pass_yards', 'home_pass_attempts',
               'home_pass_completions', 'home_pass_touchdowns', 'home_pass_yards',
               'home_penalties', 'home_points', 'home_rush_attempts',
               'home_rush_touchdowns', 'home_rush_yards', 'home_third_down_attempts',
               'home_third_down_conversions', 'home_time_of_possession',
               'home_times_sacked', 'home_total_yards', 'home_turnovers',
               'home_yards_from_penalties', 'home_yards_lost_from_sacks']].reset_index().drop(columns = 'index').rename(columns = {
               'home_first_downs': 'first_downs', 'home_fourth_down_attempts':'fourth_down_attempts',
               'home_fourth_down_conversions':'fourth_down_conversions' , 'home_fumbles': 'fumbles', 'home_fumbles_lost': 'fumbles_lost',
               'home_interceptions': 'interceptions', 'home_net_pass_yards':'net_pass_yards' , 'home_pass_attempts': 'pass_attempts',
               'home_pass_completions':'pass_completions' , 'home_pass_touchdowns': 'pass_touchdowns', 'home_pass_yards': 'pass_yards',
               'home_penalties': 'penalties', 'home_points': 'points', 'home_rush_attempts': 'rush_attempts',
               'home_rush_touchdowns': 'rush_touchdowns', 'home_rush_yards': 'rush_yards', 'home_third_down_attempts': 'third_down_attempts',
               'home_third_down_conversions': 'third_down_conversions', 'home_time_of_possession': 'time_of_possession',
               'home_times_sacked': 'times_sacked', 'home_total_yards': 'total_yards', 'home_turnovers': 'turnovers',
               'home_yards_from_penalties':'yards_from_penalties', 'home_yards_lost_from_sacks': 'yards_lost_from_sacks'})
        
        # Merge the team_df & stats_df for both home & away teams. Set the left_index & right_index to True so that both dataframes merge on the same indices. 
        away_team_df = pd.merge(away_team_df, away_stats_df,left_index = True, right_index = True)
        home_team_df = pd.merge(home_team_df, home_stats_df,left_index = True, right_index = True)
        try:
            # Converting time_of_possession from MM:SS format into seconds(int). 
            away_team_df['time_of_possession'] = (int(away_team_df['time_of_possession'].loc[0][0:2]) * 60) + int(away_team_df['time_of_possession'].loc[0][3:5])
            home_team_df['time_of_possession'] = (int(home_team_df['time_of_possession'].loc[0][0:2]) * 60) + int(home_team_df['time_of_possession'].loc[0][3:5])
        except TypeError:
            away_team_df['time_of_possession'] = np.nan
            home_team_df['time_of_possession'] = np.nan
    except TypeError:
        away_team_df = pd.DataFrame()
        home_team_df = pd.DataFrame()
    return away_team_df, home_team_df


def game_data_up_to_week(weeks,year):
    weeks_games_df = pd.DataFrame()
    for w in range(len(weeks)):
        date_string = str(weeks[w]) + '-' + str(year)
        week_scores = Boxscores(weeks[w],year)
        week_games_df = pd.DataFrame()
        for g in range(len(week_scores.games[date_string])):
            game_str = week_scores.games[date_string][g]['boxscore']
            game_stats = Boxscore(game_str)
            game_df = pd.DataFrame(week_scores.games[date_string][g], index = [0])
            away_team_df, home_team_df = game_data(game_df,game_stats)
            away_team_df['week'] = weeks[w]
            home_team_df['week'] = weeks[w]
            week_games_df = pd.concat([week_games_df,away_team_df])
            week_games_df = pd.concat([week_games_df,home_team_df])
        weeks_games_df = pd.concat([weeks_games_df,week_games_df])
    return weeks_games_df

def get_schedule(year):
    weeks = list(range(1,19))
    schedule_df = pd.DataFrame()
    for w in range(len(weeks)):
        date_string = str(weeks[w]) + '-' + str(year)
        week_scores = Boxscores(weeks[w],year)
        week_games_df = pd.DataFrame()
        for g in range(len(week_scores.games[date_string])):
            game = pd.DataFrame(week_scores.games[date_string][g], index = [0])[['away_name', 'away_abbr','home_name', 'home_abbr','winning_name', 'winning_abbr' ]]
            game['week'] = weeks[w]
            week_games_df = pd.concat([week_games_df,game])
        schedule_df = pd.concat([schedule_df, week_games_df]).reset_index().drop(columns = 'index') 
    return schedule_df 

def agg_weekly_data(schedule_df,weeks_games_df,current_week,weeks):
    schedule_df = schedule_df[schedule_df.week < current_week]
    agg_games_df = pd.DataFrame()
    for w in range(1,len(weeks)):
        games_df = schedule_df[schedule_df.week == weeks[w]]
        agg_weekly_df = weeks_games_df[weeks_games_df.week < weeks[w]].drop(columns = ['score','week','game_won', 'game_lost']).groupby(by=["team_name", "team_abbr"]).mean().reset_index()
        win_loss_df = weeks_games_df[weeks_games_df.week < weeks[w]][["team_name", "team_abbr",'game_won', 'game_lost']].groupby(by=["team_name", "team_abbr"]).sum().reset_index()
        win_loss_df['win_perc'] = win_loss_df['game_won'] / (win_loss_df['game_won'] + win_loss_df['game_lost'])
        win_loss_df = win_loss_df.drop(columns = ['game_won', 'game_lost'])

        try:
            agg_weekly_df['fourth_down_perc'] = agg_weekly_df['fourth_down_conversions'] / agg_weekly_df['fourth_down_attempts']  
        except ZeroDivisionError:
            agg_weekly_df['fourth_down_perc'] = 0 
        agg_weekly_df['fourth_down_perc'] = agg_weekly_df['fourth_down_perc'].fillna(0)

        try:
            agg_weekly_df['third_down_perc'] = agg_weekly_df['third_down_conversions'] / agg_weekly_df['third_down_attempts']  
        except ZeroDivisionError:
            agg_weekly_df['third_down_perc'] = 0
        agg_weekly_df['third_down_perc'] = agg_weekly_df['third_down_perc'].fillna(0)  

        agg_weekly_df = agg_weekly_df.drop(columns = ['fourth_down_attempts', 'fourth_down_conversions', 'third_down_attempts', 'third_down_conversions'])
        agg_weekly_df = pd.merge(win_loss_df,agg_weekly_df,left_on = ['team_name', 'team_abbr'], right_on = ['team_name', 'team_abbr'])

        away_df = pd.merge(games_df,agg_weekly_df,how = 'inner', left_on = ['away_name', 'away_abbr'], right_on = ['team_name', 'team_abbr']).drop(columns = ['team_name', 'team_abbr']).rename(columns = {
                'win_perc': 'away_win_perc',
               'first_downs': 'away_first_downs', 'fumbles': 'away_fumbles', 'fumbles_lost':'away_fumbles_lost', 'interceptions':'away_interceptions',
               'net_pass_yards': 'away_net_pass_yards', 'pass_attempts':'away_pass_attempts', 'pass_completions':'away_pass_completions',
               'pass_touchdowns':'away_pass_touchdowns', 'pass_yards':'away_pass_yards', 'penalties':'away_penalties', 'points':'away_points', 'rush_attempts':'away_rush_attempts',
               'rush_touchdowns':'away_rush_touchdowns', 'rush_yards':'away_rush_yards', 'time_of_possession':'away_time_of_possession', 'times_sacked':'away_times_sacked',
               'total_yards':'away_total_yards', 'turnovers':'away_turnovers', 'yards_from_penalties':'away_yards_from_penalties',
               'yards_lost_from_sacks': 'away_yards_lost_from_sacks', 'fourth_down_perc':'away_fourth_down_perc', 'third_down_perc':'away_third_down_perc'})

        home_df = pd.merge(games_df,agg_weekly_df,how = 'inner', left_on = ['home_name', 'home_abbr'], right_on = ['team_name', 'team_abbr']).drop(columns = ['team_name', 'team_abbr']).rename(columns = {
                'win_perc': 'home_win_perc',
               'first_downs': 'home_first_downs', 'fumbles': 'home_fumbles', 'fumbles_lost':'home_fumbles_lost', 'interceptions':'home_interceptions',
               'net_pass_yards': 'home_net_pass_yards', 'pass_attempts':'home_pass_attempts', 'pass_completions':'home_pass_completions',
               'pass_touchdowns':'home_pass_touchdowns', 'pass_yards':'home_pass_yards', 'penalties':'home_penalties', 'points':'home_points', 'rush_attempts':'home_rush_attempts',
               'rush_touchdowns':'home_rush_touchdowns', 'rush_yards':'home_rush_yards', 'time_of_possession':'home_time_of_possession', 'times_sacked':'home_times_sacked',
               'total_yards':'home_total_yards', 'turnovers':'home_turnovers', 'yards_from_penalties':'home_yards_from_penalties',
               'yards_lost_from_sacks': 'home_yards_lost_from_sacks', 'fourth_down_perc':'home_fourth_down_perc', 'third_down_perc':'home_third_down_perc'})

        agg_weekly_df = pd.merge(away_df,home_df,left_on = ['away_name', 'away_abbr', 'home_name', 'home_abbr', 'winning_name',
               'winning_abbr', 'week'], right_on = ['away_name', 'away_abbr', 'home_name', 'home_abbr', 'winning_name',
               'winning_abbr', 'week'])

        agg_weekly_df['win_perc_dif'] = agg_weekly_df['away_win_perc'] - agg_weekly_df['home_win_perc']
        agg_weekly_df['first_downs_dif'] = agg_weekly_df['away_first_downs'] - agg_weekly_df['home_first_downs']
        agg_weekly_df['fumbles_dif'] = agg_weekly_df['away_fumbles'] - agg_weekly_df['home_fumbles']
        agg_weekly_df['interceptions_dif'] = agg_weekly_df['away_interceptions'] - agg_weekly_df['home_interceptions']
        agg_weekly_df['net_pass_yards_dif'] = agg_weekly_df['away_net_pass_yards'] - agg_weekly_df['home_net_pass_yards']
        agg_weekly_df['pass_attempts_dif'] = agg_weekly_df['away_pass_attempts'] - agg_weekly_df['home_pass_attempts']
        agg_weekly_df['pass_completions_dif'] = agg_weekly_df['away_pass_completions'] - agg_weekly_df['home_pass_completions']
        agg_weekly_df['pass_touchdowns_dif'] = agg_weekly_df['away_pass_touchdowns'] - agg_weekly_df['home_pass_touchdowns']
        agg_weekly_df['pass_yards_dif'] = agg_weekly_df['away_pass_yards'] - agg_weekly_df['home_pass_yards']
        agg_weekly_df['penalties_dif'] = agg_weekly_df['away_penalties'] - agg_weekly_df['home_penalties']
        agg_weekly_df['points_dif'] = agg_weekly_df['away_points'] - agg_weekly_df['home_points']
        agg_weekly_df['rush_attempts_dif'] = agg_weekly_df['away_rush_attempts'] - agg_weekly_df['home_rush_attempts']
        agg_weekly_df['rush_touchdowns_dif'] = agg_weekly_df['away_rush_touchdowns'] - agg_weekly_df['home_rush_touchdowns']
        agg_weekly_df['rush_yards_dif'] = agg_weekly_df['away_rush_yards'] - agg_weekly_df['home_rush_yards']
        agg_weekly_df['time_of_possession_dif'] = agg_weekly_df['away_time_of_possession'] - agg_weekly_df['home_time_of_possession']
        agg_weekly_df['times_sacked_dif'] = agg_weekly_df['away_times_sacked'] - agg_weekly_df['home_times_sacked']
        agg_weekly_df['total_yards_dif'] = agg_weekly_df['away_total_yards'] - agg_weekly_df['home_total_yards']
        agg_weekly_df['turnovers_dif'] = agg_weekly_df['away_turnovers'] - agg_weekly_df['home_turnovers']
        agg_weekly_df['yards_from_penalties_dif'] = agg_weekly_df['away_yards_from_penalties'] - agg_weekly_df['home_yards_from_penalties']
        agg_weekly_df['yards_lost_from_sacks_dif'] = agg_weekly_df['away_yards_lost_from_sacks'] - agg_weekly_df['home_yards_lost_from_sacks']
        agg_weekly_df['fourth_down_perc_dif'] = agg_weekly_df['away_fourth_down_perc'] - agg_weekly_df['home_fourth_down_perc']
        agg_weekly_df['third_down_perc_dif'] = agg_weekly_df['away_third_down_perc'] - agg_weekly_df['home_third_down_perc']

        agg_weekly_df = agg_weekly_df.drop(columns = ['away_win_perc',
               'away_first_downs', 'away_fumbles', 'away_fumbles_lost', 'away_interceptions',
               'away_net_pass_yards', 'away_pass_attempts','away_pass_completions',
               'away_pass_touchdowns', 'away_pass_yards', 'away_penalties', 'away_points', 'away_rush_attempts',
               'away_rush_touchdowns', 'away_rush_yards', 'away_time_of_possession', 'away_times_sacked',
               'away_total_yards', 'away_turnovers', 'away_yards_from_penalties',
               'away_yards_lost_from_sacks','away_fourth_down_perc', 'away_third_down_perc','home_win_perc',
               'home_first_downs', 'home_fumbles', 'home_fumbles_lost', 'home_interceptions',
               'home_net_pass_yards', 'home_pass_attempts','home_pass_completions',
               'home_pass_touchdowns', 'home_pass_yards', 'home_penalties', 'home_points', 'home_rush_attempts',
               'home_rush_touchdowns', 'home_rush_yards', 'home_time_of_possession', 'home_times_sacked',
               'home_total_yards', 'home_turnovers', 'home_yards_from_penalties',
               'home_yards_lost_from_sacks','home_fourth_down_perc', 'home_third_down_perc'])
        
        if (agg_weekly_df['winning_name'].isnull().values.any() and weeks[w] > 3):
            agg_weekly_df['result'] = np.nan
            print(f"Week {weeks[w]} games have not finished yet.")
        else:
            agg_weekly_df['result'] = agg_weekly_df['winning_name'] == agg_weekly_df['away_name']
            agg_weekly_df['result'] = agg_weekly_df['result'].astype('float')
        agg_weekly_df = agg_weekly_df.drop(columns = ['winning_name', 'winning_abbr'])
        agg_games_df = pd.concat([agg_games_df, agg_weekly_df])
    agg_games_df = agg_games_df.reset_index().drop(columns = 'index')
    # What is .drop(index = 20) doing?
    agg_games_df = agg_games_df.drop(index = 20, axis=0)
    return agg_games_df

def get_elo():
    elo_df = pd.read_csv('https://projects.fivethirtyeight.com/nfl-api/nfl_elo_latest.csv')
    elo_df = elo_df.drop(columns = ['season','neutral' ,'playoff', 'elo_prob1', 'elo_prob2', 'elo1_post', 'elo2_post',
           'qbelo1_pre', 'qbelo2_pre', 'qb1', 'qb2', 'qb1_adj', 'qb2_adj', 'qbelo_prob1', 'qbelo_prob2',
           'qb1_game_value', 'qb2_game_value', 'qb1_value_post', 'qb2_value_post',
           'qbelo1_post', 'qbelo2_post', 'score1', 'score2', 'quality', 'importance', 'total_rating'])
    elo_df.date = pd.to_datetime(elo_df.date)
    elo_df = elo_df[elo_df.date < '01-05-2022']

    elo_df['team1'] = elo_df['team1'].replace(['KC', 'JAX', 'CAR', 'BAL', 'BUF', 'MIN', 'DET', 'ATL', 'NE', 'WSH',
           'CIN', 'NO', 'SF', 'LAR', 'NYG', 'DEN', 'CLE', 'IND', 'TEN', 'NYJ',
           'TB', 'MIA', 'PIT', 'PHI', 'GB', 'CHI', 'DAL', 'ARI', 'LAC', 'HOU',
           'SEA', 'OAK'],
            ['kan','jax','car', 'rav', 'buf', 'min', 'det', 'atl', 'nwe', 'was', 
            'cin', 'nor', 'sfo', 'ram', 'nyg', 'den', 'cle', 'clt', 'oti', 'nyj', 
             'tam','mia', 'pit', 'phi', 'gnb', 'chi', 'dal', 'crd', 'sdg', 'htx', 'sea', 'rai' ])
    elo_df['team2'] = elo_df['team2'].replace(['KC', 'JAX', 'CAR', 'BAL', 'BUF', 'MIN', 'DET', 'ATL', 'NE', 'WSH',
           'CIN', 'NO', 'SF', 'LAR', 'NYG', 'DEN', 'CLE', 'IND', 'TEN', 'NYJ',
           'TB', 'MIA', 'PIT', 'PHI', 'GB', 'CHI', 'DAL', 'ARI', 'LAC', 'HOU',
           'SEA', 'OAK'],
            ['kan','jax','car', 'rav', 'buf', 'min', 'det', 'atl', 'nwe', 'was', 
            'cin', 'nor', 'sfo', 'ram', 'nyg', 'den', 'cle', 'clt', 'oti', 'nyj', 
             'tam','mia', 'pit', 'phi', 'gnb', 'chi', 'dal', 'crd', 'sdg', 'htx', 'sea', 'rai' ])
    return elo_df

def merge_rankings(agg_games_df,elo_df):
    agg_games_df = pd.merge(agg_games_df, elo_df, how = 'inner', left_on = ['home_abbr', 'away_abbr'], right_on = ['team1', 'team2']).drop(columns = ['date','team1', 'team2'])
    agg_games_df['elo_dif'] = agg_games_df['elo2_pre'] - agg_games_df['elo1_pre']
    agg_games_df['qb_dif'] = agg_games_df['qb2_value_pre'] - agg_games_df['qb1_value_pre']
    agg_games_df = agg_games_df.drop(columns = ['elo1_pre', 'elo2_pre', 'qb1_value_pre', 'qb2_value_pre'])
    return agg_games_df

# def prep_test_train(current_week,weeks,year):
#     current_week = current_week + 1
#     schedule_df  = get_schedule(year)
#     weeks_games_df = game_data_up_to_week(weeks,year)
#     agg_games_df = agg_weekly_data(schedule_df,weeks_games_df,current_week,weeks)
#     elo_df = get_elo()
#     agg_games_df = merge_rankings(agg_games_df, elo_df)
#     train_df = agg_games_df[agg_games_df.result.notna()]
#     current_week = current_week - 1
#     test_df = agg_games_df[agg_games_df.week == current_week]
#     return test_df, train_df

def prep_test_train(current_week,weeks,year):
    current_week = current_week + 1
    schedule_df  = get_schedule(year)
    weeks_games_df = game_data_up_to_week(weeks,year)
    agg_games_df = agg_weekly_data(schedule_df,weeks_games_df,current_week,weeks)
    elo_df = get_elo()
    agg_games_df = merge_rankings(agg_games_df, elo_df)
    train_df = agg_games_df[agg_games_df.result.notna()]
    current_week = current_week - 1
    test_df = agg_games_df[agg_games_df.week == current_week]
    return test_df, train_df

def display(y_pred,X_test):
    for g in range(len(y_pred)):
        win_prob = round(y_pred[g],2)
        away_team = X_test.reset_index().drop(columns = 'index').loc[g,'away_name']
        home_team = X_test.reset_index().drop(columns = 'index').loc[g,'home_name']
        print(f'The {away_team} have a probability of {win_prob} of beating the {home_team}.')
    

In [111]:
#game = game_data_up_to_week(range(1,4),2021)
#game


In [3]:
current_week = 13
weeks = list(range(1,current_week + 1))
year = 2021

pred_games_df, comp_games_df = prep_test_train(current_week,weeks,year)

Week 10 games have not finished yet.
Week 13 games have not finished yet.


In [4]:
pred_games_df

Unnamed: 0,away_name,away_abbr,home_name,home_abbr,week,win_perc_dif,first_downs_dif,fumbles_dif,interceptions_dif,net_pass_yards_dif,pass_attempts_dif,pass_completions_dif,pass_touchdowns_dif,pass_yards_dif,penalties_dif,points_dif,rush_attempts_dif,rush_touchdowns_dif,rush_yards_dif,time_of_possession_dif,times_sacked_dif,total_yards_dif,turnovers_dif,yards_from_penalties_dif,yards_lost_from_sacks_dif,fourth_down_perc_dif,third_down_perc_dif,result,elo_dif,qb_dif
163,Dallas Cowboys,dal,New Orleans Saints,nor,13,0.181818,4.636364,0.727273,0.090909,94.181818,7.181818,8.181818,0.090909,93.272727,2.363636,6.272727,-0.454545,0.090909,16.272727,86.181818,0.090909,110.454545,0.272727,25.636364,-0.909091,-0.062745,0.043014,,-39.950244,159.962301
164,Arizona Cardinals,crd,Chicago Bears,chi,13,0.454545,4.090909,0.636364,-0.181818,86.090909,4.181818,6.727273,1.090909,80.0,-0.090909,11.909091,2.0,0.727273,-3.272727,166.909091,-1.0,82.818182,-0.363636,-0.909091,-6.090909,0.335165,0.100212,,171.867039,93.922882
165,Tampa Bay Buccaneers,tam,Atlanta Falcons,atl,13,0.272727,5.545455,-0.181818,-0.454545,80.454545,6.727273,5.272727,1.272727,75.818182,0.909091,13.454545,-1.181818,0.545455,10.818182,118.909091,-0.545455,91.272727,-0.272727,15.363636,-4.636364,0.188312,0.079343,,237.310235,103.270532
166,Philadelphia Eagles,phi,New York Jets,nyj,13,0.143939,0.863636,0.25,-1.060606,-52.015152,-9.045455,-6.068182,-0.280303,-63.257576,-0.242424,7.242424,8.825758,0.772727,71.916667,-49.515152,-1.068182,19.901515,-1.098485,-2.174242,-11.242424,-0.180952,0.049692,,145.251604,103.227714
167,New York Giants,nyg,Miami Dolphins,mia,13,-0.05303,0.090909,-0.469697,-0.015152,-4.030303,-2.878788,-2.772727,-0.25,-6.939394,-0.848485,-1.136364,-0.19697,-0.113636,10.651515,-63.128788,-0.325758,6.621212,-0.393939,-4.636364,-2.909091,-0.129412,-0.058413,,-57.912337,-37.978877
168,Denver Broncos,den,Kansas City Chiefs,kan,13,-0.090909,-5.454545,-1.0,-0.363636,-66.727273,-9.0,-5.181818,-0.909091,-58.272727,-1.272727,-4.818182,0.909091,0.090909,2.818182,32.272727,1.0,-63.909091,-0.909091,-9.0,8.454545,0.030303,-0.13834,,-173.767258,-78.641705
169,Indianapolis Colts,clt,Houston Texans,htx,13,0.318182,7.454545,-0.030303,-0.590909,35.287879,1.94697,0.454545,0.659091,27.492424,-2.159091,13.424242,3.734848,0.871212,66.227273,231.007576,-1.068182,101.515152,-0.386364,-13.219697,-7.795455,0.039683,0.027572,,231.468424,37.359951
170,Minnesota Vikings,min,Detroit Lions,det,13,0.454545,1.909091,0.272727,-0.454545,72.090909,2.363636,1.818182,1.181818,61.272727,-0.090909,9.727273,3.090909,-0.272727,3.727273,85.363636,-1.181818,75.818182,-0.545455,8.636364,-10.818182,0.208075,0.054865,,239.154443,100.393607
171,Los Angeles Chargers,sdg,Cincinnati Bengals,cin,13,-0.090909,3.727273,-0.545455,-0.181818,37.181818,8.636364,4.636364,0.090909,30.545455,3.363636,-3.272727,-4.818182,-0.272727,-8.909091,-165.909091,-0.818182,28.272727,-0.272727,31.181818,-6.636364,0.028571,0.057143,,-46.218841,84.883848
172,Washington Football Team,was,Las Vegas Raiders,rai,13,-0.090909,1.454545,0.272727,0.090909,-72.181818,-4.0,-2.818182,-0.090909,-71.636364,-2.454545,-2.727273,5.818182,-0.181818,36.454545,81.818182,-0.090909,-35.727273,0.363636,-20.454545,0.545455,0.058462,0.048097,,15.185078,-68.820114


In [5]:
comp_games_df

Unnamed: 0,away_name,away_abbr,home_name,home_abbr,week,win_perc_dif,first_downs_dif,fumbles_dif,interceptions_dif,net_pass_yards_dif,pass_attempts_dif,pass_completions_dif,pass_touchdowns_dif,pass_yards_dif,penalties_dif,points_dif,rush_attempts_dif,rush_touchdowns_dif,rush_yards_dif,time_of_possession_dif,times_sacked_dif,total_yards_dif,turnovers_dif,yards_from_penalties_dif,yards_lost_from_sacks_dif,fourth_down_perc_dif,third_down_perc_dif,result,elo_dif,qb_dif
0,New York Giants,nyg,Washington Football Team,was,2,0.000000,4.000000,-2.000000,0.000000,121.000000,16.000000,8.000000,0.0,132.000000,-4.000000,-3.000000,-7.000000,1.000000,-66.000000,55.000000,1.000000,55.000000,0.000000,-9.000000,11.000000,-0.666667,0.200000,0.0,-50.848658,100.316973
1,Cincinnati Bengals,cin,Chicago Bears,chi,2,1.000000,-4.000000,-1.000000,-1.000000,29.000000,-13.000000,-9.000000,2.0,45.000000,0.000000,13.000000,10.000000,-1.000000,15.000000,-23.000000,2.000000,44.000000,-2.000000,-20.000000,16.000000,0.666667,-0.240260,0.0,-51.742731,34.616025
2,New Orleans Saints,nor,Carolina Panthers,car,2,0.000000,4.000000,-1.000000,0.000000,-119.000000,-14.000000,-9.000000,4.0,-128.000000,0.000000,19.000000,12.000000,-1.000000,60.000000,196.000000,-1.000000,-59.000000,-1.000000,-2.000000,-9.000000,1.000000,0.214286,0.0,204.627654,54.983874
3,Las Vegas Raiders,rai,Pittsburgh Steelers,pit,2,0.000000,10.000000,0.000000,1.000000,232.000000,24.000000,16.000000,1.0,247.000000,5.000000,10.000000,0.000000,2.000000,7.000000,445.000000,1.000000,239.000000,1.000000,30.000000,15.000000,0.000000,0.133333,1.0,-62.282436,-14.386838
4,San Francisco 49ers,sfo,Philadelphia Eagles,phi,2,0.000000,-3.000000,0.000000,0.000000,50.000000,-9.000000,-9.000000,-1.0,55.000000,-7.000000,9.000000,-3.000000,1.000000,-42.000000,-270.000000,0.000000,8.000000,2.000000,-10.000000,5.000000,0.000000,-0.128205,1.0,36.874709,-13.318243
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
158,Los Angeles Chargers,sdg,Denver Broncos,den,12,0.100000,3.800000,-0.100000,0.200000,49.800000,5.300000,2.700000,0.8,42.500000,1.800000,6.000000,-1.600000,0.300000,-5.400000,-163.900000,-1.200000,44.400000,-0.100000,16.600000,-7.300000,-0.055556,0.105924,0.0,74.721833,99.202771
159,Minnesota Vikings,min,San Francisco 49ers,sfo,12,0.000000,0.100000,0.000000,-0.400000,31.700000,7.400000,6.100000,0.6,32.000000,1.400000,0.900000,-1.100000,-0.700000,-4.000000,41.600000,-0.500000,27.700000,-0.800000,5.300000,0.300000,0.155844,0.009179,0.0,6.369632,92.115953
160,Los Angeles Rams,ram,Green Bay Packers,gnb,12,-0.027273,0.700000,-0.490909,0.445455,46.300000,3.154545,2.609091,0.4,44.918182,0.163636,4.645455,-1.336364,0.063636,-10.100000,-205.163636,-0.500000,36.200000,0.281818,-3.627273,-1.381818,-0.071795,-0.002783,0.0,-77.381374,-42.459544
161,Cleveland Browns,cle,Baltimore Ravens,rav,12,-0.154545,-3.772727,-0.027273,-0.354545,-45.963636,-6.236364,-4.272727,-0.4,-47.590909,1.472727,-2.518182,-1.690909,0.345455,5.818182,-187.427273,-0.581818,-40.145455,-0.300000,15.800000,-1.627273,-0.257576,0.020200,0.0,-51.409517,-103.082625


In [6]:
train_df = comp_games_df
test_df = pred_games_df

X_train = train_df.drop(columns = ['away_name', 'away_abbr', 'home_name', 'home_abbr', 'week','result'])
y_train = train_df[['result']] 
X_test = test_df.drop(columns = ['away_name', 'away_abbr', 'home_name', 'home_abbr', 'week','result'])
y_test = test_df[['result']]

In [7]:
from sklearn.linear_model import LogisticRegression

# Figure out what these Logistic Regression settings do.
# penalty='l1' - Imposes a penalty to the logistic model for having too many variables. Shrinks the coefficients of less contributive variables. Results in more sparse data.
# dual=False - Duality vs Primal
# tol=0.001 - Tolerance for stopping criteria. This tells scikit to stop searching for a minimum (or maximum) once some tolerance is achieved, i.e. once you're close enough.
# fit_intercept=True - Sets te y-intercept to the line of best fit (False would keep it as a y-intercept of 0.)
# intercept_scaling=1 - This is the default. description is over my head.
# class_weight = 'balanced' - “balanced” mode uses the values of y to automatically adjust weights inversely proportional to class frequencies in the input data
clf = LogisticRegression(penalty='l1', dual=False, tol=0.001, C=1.0, fit_intercept=True, 
                   intercept_scaling=1, class_weight='balanced', random_state=None, 
                   solver='liblinear', max_iter=1000, multi_class='ovr', verbose=0)

clf.fit(X_train, np.ravel(y_train.values))
y_pred = clf.predict_proba(X_test)
y_pred = y_pred[:,1]

display(y_pred,test_df)

The Dallas Cowboys have a probability of 0.83 of beating the New Orleans Saints.
The Arizona Cardinals have a probability of 0.79 of beating the Chicago Bears.
The Tampa Bay Buccaneers have a probability of 0.86 of beating the Atlanta Falcons.
The Philadelphia Eagles have a probability of 0.39 of beating the New York Jets.
The New York Giants have a probability of 0.44 of beating the Miami Dolphins.
The Denver Broncos have a probability of 0.33 of beating the Kansas City Chiefs.
The Indianapolis Colts have a probability of 0.92 of beating the Houston Texans.
The Minnesota Vikings have a probability of 0.68 of beating the Detroit Lions.
The Los Angeles Chargers have a probability of 0.25 of beating the Cincinnati Bengals.
The Washington Football Team have a probability of 0.45 of beating the Las Vegas Raiders.
The Jacksonville Jaguars have a probability of 0.17 of beating the Los Angeles Rams.
The Baltimore Ravens have a probability of 0.87 of beating the Pittsburgh Steelers.
The San Fr

In [8]:
y_pred

array([0.8261252 , 0.79365135, 0.85976659, 0.3920749 , 0.43596273,
       0.33346208, 0.91564418, 0.68493368, 0.25225801, 0.45292194,
       0.16889699, 0.87443321, 0.73413279, 0.36916495])

In [9]:
# Create dataframe to hold prediction results
results_df = test_df[['week','away_name','home_name']]
results_df['away_team_win_%'] = y_pred
results_df['home_team_win_%'] = 1 - results_df['away_team_win_%']
results_df['predicted_winner'] = 0

# Loop through results dataframe and add predicted_winner name to column
for index, row in results_df.iterrows():
    if results_df.loc[index,'away_team_win_%'] > results_df.loc[index,'home_team_win_%']:
        results_df.loc[index,'predicted_winner'] = results_df.loc[index,'away_name']
    else:
        results_df.loc[index,'predicted_winner'] = results_df.loc[index,'home_name']

# Set Index to the 'week' column
results_df=results_df.set_index('week')      

# Use get_schedule function to pull dataframe that shows actual weekly winners. Includes all weeks.
weekly_winner = get_schedule('2021')
weekly_winner = weekly_winner.drop(columns=['away_abbr','home_abbr','winning_abbr'])
weekly_winner = weekly_winner.rename(columns={'away_name':'ww_away_name','home_name':'ww_home_name'})

# Slices weekly_winner dataframe based on current week model is predicting.
current_week_wins = weekly_winner.loc[weekly_winner['week'] == current_week]
current_week_wins = current_week_wins.set_index('week')

# Sort both dataframes by away_name
results_df = results_df.sort_values(by='away_name')
current_week_wins = current_week_wins.sort_values(by='ww_away_name')

# Combine results & weekly_winner df's. Drop dupe columns and rename winning_name to 'actual_winner'
actual_results_df = (pd.concat([results_df,current_week_wins],axis=1)).drop(columns=['ww_away_name','ww_home_name']).rename(columns={'winning_name':'actual_winner'})
actual_results_df




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[r

Unnamed: 0_level_0,away_name,home_name,away_team_win_%,home_team_win_%,predicted_winner,actual_winner
week,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
13,Arizona Cardinals,Chicago Bears,0.793651,0.206349,Arizona Cardinals,
13,Baltimore Ravens,Pittsburgh Steelers,0.874433,0.125567,Baltimore Ravens,
13,Dallas Cowboys,New Orleans Saints,0.826125,0.173875,Dallas Cowboys,Dallas Cowboys
13,Denver Broncos,Kansas City Chiefs,0.333462,0.666538,Kansas City Chiefs,
13,Indianapolis Colts,Houston Texans,0.915644,0.084356,Indianapolis Colts,
13,Jacksonville Jaguars,Los Angeles Rams,0.168897,0.831103,Los Angeles Rams,
13,Los Angeles Chargers,Cincinnati Bengals,0.252258,0.747742,Cincinnati Bengals,
13,Minnesota Vikings,Detroit Lions,0.684934,0.315066,Minnesota Vikings,
13,New England Patriots,Buffalo Bills,0.369165,0.630835,Buffalo Bills,
13,New York Giants,Miami Dolphins,0.435963,0.564037,Miami Dolphins,


In [13]:
# Export actual_results_df to csv based on the current_week predicted.
file_path = str(f'Exports/week_{current_week}_base_results.csv')
actual_results_df.to_csv(Path(file_path))

In [14]:
from sklearn.metrics import accuracy_score

accuracy_score(y_test,np.round(y_pred))

ValueError: Input contains NaN, infinity or a value too large for dtype('float64').

In [102]:
# Create function to read all csvs back into file for final analysis.
# Attempted...
'''def read_csvs():
    weeks = range(4,14)
    final_df = pd.DataFrame()
    for week in weeks:
        csv_string = str(f'Exports/week_{weeks[week]}_base_results.csv')
        df = pd.read_csv('csv_string')
        final_df.append(df)
    return final_df'''
        

In [121]:
# Read in all csvs
w4 = pd.read_csv(Path('Exports/week_4_base_results.csv'), index_col='week')
w5 = pd.read_csv(Path('Exports/week_5_base_results.csv'), index_col='week')
w6 = pd.read_csv(Path('Exports/week_6_base_results.csv'), index_col='week')
w7 = pd.read_csv(Path('Exports/week_7_base_results.csv'), index_col='week')
w8 = pd.read_csv(Path('Exports/week_8_base_results.csv'), index_col='week')
w9 = pd.read_csv(Path('Exports/week_9_base_results.csv'), index_col='week')
w10 = pd.read_csv(Path('Exports/week_10_base_results.csv'), index_col='week')
w11 = pd.read_csv(Path('Exports/week_11_base_results.csv'), index_col='week')
w12 = pd.read_csv(Path('Exports/week_12_base_results.csv'), index_col='week')
#w13 = pd.read_csv(Path('Exports/week_13_base_results.csv'), index_col='week')

In [129]:
# Combine csvs so far
season_results = pd.DataFrame()
season_results = season_results.append(w4)
season_results = season_results.append(w5)
season_results = season_results.append(w6)
season_results = season_results.append(w7)
season_results = season_results.append(w8)
season_results = season_results.append(w9)
season_results = season_results.append(w10)
season_results = season_results.append(w11)
season_results = season_results.append(w12)
season_results

Unnamed: 0_level_0,away_name,home_name,away_team_win_%,home_team_win_%,predicted_winner,actual_winner
week,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
4,Arizona Cardinals,Los Angeles Rams,0.995219,0.004781,Arizona Cardinals,Arizona Cardinals
4,Baltimore Ravens,Denver Broncos,0.977060,0.022940,Baltimore Ravens,Baltimore Ravens
4,Carolina Panthers,Dallas Cowboys,0.158786,0.841214,Dallas Cowboys,Dallas Cowboys
4,Cleveland Browns,Minnesota Vikings,0.991119,0.008881,Cleveland Browns,Cleveland Browns
4,Detroit Lions,Chicago Bears,0.439891,0.560109,Chicago Bears,Chicago Bears
...,...,...,...,...,...,...
12,Philadelphia Eagles,New York Giants,0.442395,0.557605,New York Giants,New York Giants
12,Pittsburgh Steelers,Cincinnati Bengals,0.156439,0.843561,Cincinnati Bengals,Cincinnati Bengals
12,Seattle Seahawks,Washington Football Team,0.333895,0.666105,Washington Football Team,Washington Football Team
12,Tampa Bay Buccaneers,Indianapolis Colts,0.526620,0.473380,Tampa Bay Buccaneers,Tampa Bay Buccaneers


In [130]:
for index, row in season_results.iterrows():
    if season_results.loc[index,'predicted_winner'] == season_results.loc[index,'actual_winner']:
        season_results.loc[index,'correct_prediction'] = 1
    else:
        season_results.loc[index,'correct_prediction'] = 0

ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().